{"object":"list","data":[{"id":"anthropic/claude-haiku-4.5","object":"model","created":1760486400,"owned_by":"bedrock","canonical_slug":"claude-haiku-4-5-20251001","name":"Anthropic: Claude Haiku 4.5","description":"Anthropic's fastest and most cost-effective model optimized for near-instant responsiveness. Features extended thinking capabilities and excels at coding tasks.","context_length":200000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000001","completion":"0.000005","input_cache_read":"0.0000001","input_cache_write_5m":"0.00000125","input_cache_write_1h":"0.000002","web_search":"0.015"},"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["claude-haiku-4.5","claude-haiku-4-5","claude-haiku-4-5-20251001"]},{"id":"anthropic/claude-opus-4.5","object":"model","created":1763942400,"owned_by":"bedrock","canonical_slug":"claude-opus-4-5-20251101","name":"Anthropic: Claude Opus 4.5","description":"Anthropic's most powerful model with extended thinking, agentic workflows, and computer use support. Excels at complex analysis, creative writing, and multi-step reasoning tasks.","context_length":200000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","input_cache_read":"0.0000005","input_cache_write_5m":"0.00000625","input_cache_write_1h":"0.00001","web_search":"0.015"},"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["claude-opus-4.5","claude-opus-4-5","claude-opus-4-5-20251101"]},{"id":"anthropic/claude-opus-4.6","object":"model","created":1770249600,"owned_by":"bedrock","canonical_slug":"claude-opus-4-6-20260205","name":"Anthropic: Claude Opus 4.6","description":"Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective for large codebases, complex refactors, and multi-step debugging that unfolds over time. The model shows deeper contextual understanding, stronger problem decomposition, and greater reliability on hard engineering tasks than prior generations.Beyond coding, Opus 4.6 excels at sustained knowledge work. It produces near-production-ready documents, plans, and analyses in a single pass, and maintains coherence across very long outputs and extended sessions. This makes it a strong default for tasks that require persistence, judgment, and follow-through, such as technical design, migration planning, and end-to-end project execution.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","input_cache_read":"0.0000005","input_cache_write_5m":"0.00000625","input_cache_write_1h":"0.00001","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["claude-opus-4.6","claude-opus-4-6","claude-opus-4-6-20260205"]},{"id":"anthropic/claude-opus-4.7","object":"model","created":1776297600,"owned_by":"bedrock","canonical_slug":"claude-opus-4-7","name":"Anthropic: Claude Opus 4.7","description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","input_cache_read":"0.0000005","input_cache_write_5m":"0.00000625","input_cache_write_1h":"0.00001","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["claude-opus-4.7","claude-opus-4-7","claude-opus-4-7-20260416"]},{"id":"anthropic/claude-sonnet-4.5","object":"model","created":1759104000,"owned_by":"bedrock","canonical_slug":"claude-sonnet-4-5-20250929","name":"Anthropic: Claude Sonnet 4.5","description":"Anthropic's balanced model with state-of-the-art performance on SWE-bench Verified. Ideal for coding, analysis, and general-purpose tasks with excellent cost-performance ratio.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000003","completion":"0.000015","input_cache_read":"0.0000003","input_cache_write_5m":"0.00000375","input_cache_write_1h":"0.000006","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["claude-sonnet-4.5","claude-sonnet-4-5","claude-sonnet-4-5-20250929"]},{"id":"anthropic/claude-sonnet-4.6","object":"model","created":1771286400,"owned_by":"bedrock","canonical_slug":"claude-sonnet-4-6-20260217","name":"Anthropic: Claude Sonnet 4.6","description":"Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000003","completion":"0.000015","input_cache_read":"0.0000003","input_cache_write_5m":"0.00000375","input_cache_write_1h":"0.000006","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["claude-sonnet-4.6","claude-sonnet-4-6","claude-sonnet-4-6-20260217"]},{"id":"bailian/qwen-flash","object":"model","created":1753660800,"owned_by":"dashscope","canonical_slug":"qwen-flash","name":"Qwen Flash","description":"Alibaba Qwen Flash via Dashscope. Ultra-fast inference for latency-sensitive tasks. Lowest cost option in the Qwen family.","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.000000022","completion":"0.00000022","input_cache_read":"0.0000000043","input_cache_write":"0.000000027","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["qwen-flash"]},{"id":"bailian/qwen-max","object":"model","created":1737763200,"owned_by":"dashscope","canonical_slug":"qwen-max","name":"Qwen Max","description":"Alibaba Qwen Max via Dashscope. High-performance model for complex tasks requiring sophisticated reasoning and generation.","context_length":32000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000035","completion":"0.00000138","input_cache_read":"0.000000069"},"top_provider":{"context_length":32000,"max_completion_tokens":8000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen-max"]},{"id":"bailian/qwen-plus","object":"model","created":1764547200,"owned_by":"dashscope","canonical_slug":"qwen-plus","name":"Qwen Plus","description":"Alibaba Qwen Plus via Dashscope. Balanced performance and cost for general tasks. Strong Chinese and English bilingual capabilities.","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000012","completion":"0.00000029","input_cache_read":"0.000000023"},"top_provider":{"context_length":1000000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen-plus"]},{"id":"bailian/qwen-turbo","object":"model","created":1752537600,"owned_by":"dashscope","canonical_slug":"qwen-turbo","name":"Qwen Turbo","description":"Alibaba Qwen Turbo via Dashscope. Fast and cost-effective for simple tasks requiring quick responses. Optimized for Chinese language understanding.","context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000005","completion":"0.00000009","input_cache_read":"0.0000000086"},"top_provider":{"context_length":128000,"max_completion_tokens":16000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen-turbo"]},{"id":"bailian/qwen-vl-max","object":"model","created":1755043200,"owned_by":"dashscope","canonical_slug":"qwen-vl-max","name":"Qwen VL Max","description":"Alibaba Qwen VL Max via Dashscope. Top-tier vision-language model for multimodal tasks including image understanding and visual reasoning.","context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000023","completion":"0.00000058","input_cache_read":"0.000000046"},"top_provider":{"context_length":128000,"max_completion_tokens":8000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen-vl-max"]},{"id":"bailian/qwen3-coder-flash","object":"model","created":1754352000,"owned_by":"dashscope","canonical_slug":"qwen3-coder-flash-2025-08-05","name":"Qwen3 Coder Flash","description":"基于Qwen3的代码生成模型，继承Qwen3-Coder-Plus的coding agent能力，支持多轮工具交互，重点优化仓库级别理解能力并增加工具调用稳定性。","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000005","completion":"0.0000025","input_cache_read":"0.00000006","input_cache_write":"0.00000027"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["qwen3-coder-flash"]},{"id":"bailian/qwen3-coder-next","object":"model","created":1771459200,"owned_by":"dashscope","canonical_slug":"qwen3-coder-next-2026-02-19","name":"Qwen3 Coder Next","description":"Qwen3系列新一代代码生成模型，效果接近Qwen3-Coder-Plus兼具更优性能。模型重点优化仓库级别理解、支持多轮工具交互、提升对于agentic coding类工具的适配能力。","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000002","completion":"0.0000015"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen3-coder-next"]},{"id":"bailian/qwen3-coder-plus","object":"model","created":1758585600,"owned_by":"dashscope","canonical_slug":"qwen3-coder-plus-2025-09-23","name":"Qwen3 Coder Plus","description":"基于Qwen3的代码生成模型，具有强大的Coding Agent能力，擅长工具调用和环境交互，能够实现自主编程、代码能力卓越的同时兼具通用能力。","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000018","completion":"0.000009","input_cache_read":"0.0000002","input_cache_write":"0.000001"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["qwen3-coder-plus"]},{"id":"bailian/qwen3-max","object":"model","created":1769126400,"owned_by":"dashscope","canonical_slug":"qwen3-max","name":"Qwen3 Max","description":"Alibaba Qwen3 Max via Dashscope. Latest flagship model with strong reasoning, long context, and enhanced coding capabilities.","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000036","completion":"0.00000143","input_cache_read":"0.000000072","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["qwen3-max"]},{"id":"bailian/qwen3.5-122b-a10b","object":"model","created":1771804800,"owned_by":"dashscope","canonical_slug":"qwen3.5-122b-a10b","name":"Qwen: Qwen3.5 122B A10B","description":"Qwen3.5系列122B-A10B原生视觉语言模型，基于混合架构设计，融合了线性注意力机制与稀疏混合专家模型，实现了更高的推理效率。该模型的综合表现仅次于Qwen3.5-397B-A17B，文本能力显著优于Qwen3-235B-2507，视觉能力优于Qwen3-VL-235B。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000029","completion":"0.00000229","input_cache_read":"0.00000029","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen3.5-122b-a10b"]},{"id":"bailian/qwen3.5-27b","object":"model","created":1771804800,"owned_by":"dashscope","canonical_slug":"qwen3.5-27b","name":"Qwen: Qwen3.5 27B","description":"Qwen3.5系列27B原生视觉语言Dense模型，融合了线性注意力机制；响应速度快，兼具推理速度和性能。该模型的综合能力接近于Qwen3.5-122B-A10B。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000029","completion":"0.00000205","input_cache_read":"0.00000029","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen3.5-27b"]},{"id":"bailian/qwen3.5-35b-a3b","object":"model","created":1771804800,"owned_by":"dashscope","canonical_slug":"qwen3.5-35b-a3b","name":"Qwen: Qwen3.5 35B A3B","description":"Qwen3.5系列35B-A3B原生视觉语言模型，基于混合架构设计，融合了线性注意力机制与稀疏混合专家模型，实现了更高的推理效率。该模型的综合表现接近于Qwen3.5-27B。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000029","completion":"0.00000183","input_cache_read":"0.00000029","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen3.5-35b-a3b"]},{"id":"bailian/qwen3.5-397b-a17b","object":"model","created":1771804800,"owned_by":"dashscope","canonical_slug":"qwen3.5-397b-a17b","name":"Qwen: Qwen3.5 397B A17B","description":"Qwen3.5系列397B-A17B原生视觉语言模型，基于混合架构设计，融合了线性注意力机制与稀疏混合专家模型，实现了更高的推理效率。在语言理解、逻辑推理、代码生成、智能体任务、图像理解、视频理解、图形用户界面（GUI）等多种任务中，均展现出与当前顶尖前沿模型相媲美的卓越性能。具备强大的代码生成与智能体能力，对于各类智能体场景具有良好的泛化性。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000055","completion":"0.0000035","input_cache_read":"0.00000055","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen3.5-397b-a17b"]},{"id":"bailian/qwen3.5-flash","object":"model","created":1771804800,"owned_by":"dashscope","canonical_slug":"qwen3.5-flash-2026-02-23","name":"Qwen: Qwen3.5 Flash","description":"Qwen3.5原生视觉语言系列Flash模型，基于混合架构设计，融合了线性注意力机制与稀疏混合专家模型，实现了更高的推理效率。模型效果在纯文本与多模态方面相较3系列均实现飞跃式进步；响应速度快，兼具推理速度和性能。","context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000001","completion":"0.0000004","input_cache_read":"0.00000001","input_cache_write":"0.000000125","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["qwen3.5-flash"]},{"id":"bailian/qwen3.5-plus","object":"model","created":1771200000,"owned_by":"dashscope","canonical_slug":"qwen3.5-plus-2026-02-15","name":"Qwen3.5 Plus","description":"Qwen3.5原生视觉语言系列Plus模型，基于混合架构设计，融合了线性注意力机制与稀疏混合专家模型，实现了更高的推理效率。在多项任务评测中，3.5系列均展现出与当前顶尖前沿模型相媲美的卓越性能，模型效果在纯文本与多模态方面相较3系列均实现飞跃式进步。","context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000004","completion":"0.0000024","input_cache_read":"0.00000004","input_cache_write":"0.0000004","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["qwen3.5-plus"]},{"id":"bailian/qwen3.6-27b","object":"model","created":1776816000,"owned_by":"dashscope","canonical_slug":"qwen3.6-27b","name":"Qwen3.6 27B","description":"Qwen3.6系列27B原生视觉语言Dense模型，模型效果相较3.5-27B重点提升了Agentic coding能力、模型STEM与推理能力进一步增强；视觉模态方面在空间智能、物体定位与检测能力上显著增强，视频理解、文档OCR及视觉Agent能力稳步提升。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000006","completion":"0.0000036","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen3.6-27b"]},{"id":"bailian/qwen3.6-flash","object":"model","created":1776297600,"owned_by":"dashscope","canonical_slug":"qwen3.6-flash-2026-04-16","name":"Qwen3.6 Flash","description":"Qwen3.6原生视觉语言系列Flash模型，模型效果相较3.5-Flash显著提升。本模型重点提升agentic coding能力（在多项代码智能体基准上大幅超越前代）、数学推理和代码推理能力；视觉方面在空间智能能力上显著增强，物体定位与目标检测提升尤为突出。","context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000025","completion":"0.0000015","input_cache_read":"0.000000025","input_cache_write":"0.00000031","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["qwen3.6-flash"]},{"id":"bailian/qwen3.6-max-preview","object":"model","created":1776643200,"owned_by":"dashscope","canonical_slug":"qwen3.6-max-preview","name":"Qwen3.6 Max Preview","description":"Qwen3.6系列中规模最大、综合能力最强的Max模型Preview版本，当前开放纯文本模型能力供体验。相较于此前发布的Qwen3-Max和Qwen3.6-Plus，本模型在vibe coding能力上进一步提升、coding agent执行更加高效、前端编程开发能力显著提升；长尾知识能力进一步升级。","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.000002","completion":"0.000012","input_cache_read":"0.0000002","input_cache_write":"0.000002","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["qwen3.6-max-preview"]},{"id":"bailian/qwen3.6-plus","object":"model","created":1775088000,"owned_by":"dashscope","canonical_slug":"qwen3.6-plus-2026-04-02","name":"Qwen3.6 Plus","description":"Qwen3.6原生视觉语言系列Plus模型，展现出与当前顶尖前沿模型相媲美的卓越性能，模型效果相较3.5系列显著提升。模型在Agentic coding、前端编程、Vibe coding等代码能力、多模态万物识别、OCR、物体定位等能力上显著增强。","context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000005","completion":"0.000003","input_cache_read":"0.00000005","input_cache_write":"0.000000625","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["bailian/qwen3-6-plus"]},{"id":"bailian/text-embedding-v4","object":"model","created":1735689600,"owned_by":"dashscope","canonical_slug":"text-embedding-v4","name":"Qwen Text Embedding V4","description":"Alibaba Text Embedding V4 via Dashscope. Latest embedding model with improved accuracy, 1024 dimensions, and 8K context support.","context_length":8192,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.000000072","completion":"0"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/embeddings"],"aliases":["text-embedding-v4"]},{"id":"deepseek/deepseek-v3.2","object":"model","created":1764547200,"owned_by":"openrouter","canonical_slug":"deepseek-v3.2","name":"DeepSeek V3.2","description":"DeepSeek's latest model building upon instruction following and coding abilities. Pre-trained on 15 trillion tokens with excellent cost-performance ratio.","context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"deepseek","instruct_type":null},"pricing":{"prompt":"0.00000029","completion":"0.00000043","input_cache_read":"0.00000006"},"top_provider":{"context_length":128000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["deepseek-v3.2","deepseek-v3-2-251201"]},{"id":"deepseek/deepseek-v4-flash","object":"model","created":1776988800,"owned_by":"deepseek","canonical_slug":"deepseek-v4-flash-20260424","name":"DeepSeek V4 Flash","description":"DeepSeek V4 Flash is an efficiency-optimized Mixture-of-Experts model from DeepSeek with 284B total parameters and 13B activated parameters, supporting a 1M-token context window. It is designed for fast inference and high-throughput workloads, while maintaining strong reasoning and coding performance.","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"deepseek","instruct_type":null},"pricing":{"prompt":"0.00000014","completion":"0.00000028","input_cache_read":"0.000000028"},"top_provider":{"context_length":1000000,"max_completion_tokens":384000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["deepseek-v4-flash","deepseek-v4-flash-20260424"]},{"id":"deepseek/deepseek-v4-pro","object":"model","created":1776988800,"owned_by":"deepseek","canonical_slug":"deepseek-v4-pro-20260424","name":"DeepSeek V4 Pro","description":"DeepSeek V4 Pro is a large-scale Mixture-of-Experts model from DeepSeek with 1.6T total parameters and 49B activated parameters, supporting a 1M-token context window. It is designed for advanced reasoning, coding, and long-horizon agent workflows, with strong performance across knowledge, math, and software engineering benchmarks.","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"deepseek","instruct_type":null},"pricing":{"prompt":"0.00000174","completion":"0.00000348","input_cache_read":"0.000000145"},"top_provider":{"context_length":1000000,"max_completion_tokens":384000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["deepseek-v4-pro","deepseek-v4-pro-20260424"]},{"id":"google/gemini-2.5-flash","object":"model","created":1750118400,"owned_by":"google","canonical_slug":"gemini-2.5-flash-preview-05-20","name":"Google: Gemini 2.5 Flash","description":"Fast and cost-effective Gemini 2.5 with 1M context. Features toggleable reasoning capabilities and full multimodal support at significantly lower cost than Pro.","context_length":1048576,"architecture":{"modality":"text+image+audio+file-\u003etext","input_modalities":["text","image","audio","file"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000025","audio":"0.000001","input_cache_read":"0.00000003","input_cache_write":"0.000001","input_cached_audio":"0.0000001","web_search":"0.035"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["gemini-2.5-flash"]},{"id":"google/gemini-2.5-flash-image","object":"model","created":1759363200,"owned_by":"google","canonical_slug":"gemini-2.5-flash-image","name":"Google: Nano Banana (Gemini 2.5 Flash Image)","description":"Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. ","context_length":32000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000025","output_image":"0.00003"},"top_provider":{"context_length":32000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/images/generations"],"aliases":["gemini-2.5-flash-image"]},{"id":"google/gemini-2.5-flash-lite","object":"model","created":1753142400,"owned_by":"google","canonical_slug":"gemini-2.5-flash-lite-preview-06-17","name":"Google: Gemini 2.5 Flash Lite","description":"Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, [thinking] (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the Reasoning API parameter to selectively trade off cost for intelligence.","context_length":1048576,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.0000001","completion":"0.0000004","audio":"0.0000003","input_cache_read":"0.000000025","input_cache_write":"0.000001","input_cached_audio":"0.0000003","web_search":"0.035"},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["gemini-2.5-flash-lite"]},{"id":"google/gemini-2.5-pro","object":"model","created":1750118400,"owned_by":"google","canonical_slug":"gemini-2.5-pro-preview-05-06","name":"Google: Gemini 2.5 Pro","description":"Google's flagship Gemini model with 1M context, thinking/reasoning capabilities, and full multimodal support (text, image, video, audio). Excels at complex analysis and creative tasks.","context_length":1048576,"architecture":{"modality":"text+image+audio+file-\u003etext","input_modalities":["text","image","audio","file"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.00000125","completion":"0.00001","audio":"0.00000125","input_cache_read":"0.000000125","input_cache_write":"0.0000045","input_cached_audio":"0.000000125","web_search":"0.035"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["gemini-2.5-pro"]},{"id":"google/gemini-3-flash-preview","object":"model","created":1765929600,"owned_by":"google","canonical_slug":"gemini-3-flash-preview","name":"Google: Gemini 3 Flash Preview","description":"Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.","context_length":1048576,"architecture":{"modality":"text+image+audio+file-\u003etext","input_modalities":["text","image","audio","file"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.0000005","completion":"0.000003","audio":"0.000001","input_cache_read":"0.00000005","input_cache_write":"0.000001","input_cached_audio":"0.0000001","web_search":"0.014"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["gemini-3-flash-preview"]},{"id":"google/gemini-3-pro-image-preview","object":"model","created":1763596800,"owned_by":"google","canonical_slug":"gemini-3-pro-image-preview","name":"Google: Nano Banana Pro (Gemini 3 Pro Image Preview)","description":"Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.","context_length":64000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.000002","completion":"0.000012","output_image":"0.00012","web_search":"0.014"},"top_provider":{"context_length":64000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/images/generations"],"aliases":["gemini-3-pro-image-preview"]},{"id":"google/gemini-3.1-flash-image-preview","object":"model","created":1772496000,"owned_by":"google","canonical_slug":"gemini-3.1-flash-image-preview","name":"Google: Nano Banana 2 (Gemini 3.1 Flash Image Preview)","description":"Gemini 3.1 Flash Image Preview, a.k.a. \"Nano Banana 2,\" is Google’s latest state of the art image generation and editing model, delivering Pro-level visual quality at Flash speed. It combines advanced contextual understanding with fast, cost-efficient inference, making complex image generation and iterative edits significantly more accessible.","context_length":64000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.0000005","completion":"0.000003","output_image":"0.00006","web_search":"0.014"},"top_provider":{"context_length":64000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/images/generations"],"aliases":["gemini-3.1-flash-image-preview"]},{"id":"google/gemini-3.1-flash-lite-preview","object":"model","created":1772496000,"owned_by":"google","canonical_slug":"gemini-3.1-flash-lite-preview","name":"Google: Gemini 3.1 Flash Lite Preview","description":"Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across key capabilities. Improvements span audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion. Supports full thinking levels (minimal, low, medium, high) for fine-grained cost/performance trade-offs. Priced at half the cost of Gemini 3 Flash.","context_length":1000000,"architecture":{"modality":"text+image+audio+file-\u003etext","input_modalities":["text","image","audio","file"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.00000025","completion":"0.0000015","audio":"0.0000005","input_cache_read":"0.000000025","input_cache_write":"0.000001","input_cache_write_1h":"0.000001","input_cached_audio":"0.00000005","web_search":"0.014"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["gemini-3.1-flash-lite-preview"]},{"id":"google/gemini-3.1-pro-preview","object":"model","created":1771459200,"owned_by":"google","canonical_slug":"gemini-3.1-pro-preview","name":"Google: Gemini 3.1 Pro Preview","description":"Gemini 3.1 Pro is the next generation in the Gemini series of models, a suite of highly-capable, natively multimodal, reasoning models. Gemini 3 Pro is now Google’s most advanced model for complex tasks, and can comprehend vast datasets, challenging problems from different information sources, including text, audio, images, video, and entire code repositories","context_length":1048576,"architecture":{"modality":"text+image+audio+file-\u003etext","input_modalities":["text","image","audio","file"],"output_modalities":["text"],"tokenizer":"gemini","instruct_type":null},"pricing":{"prompt":"0.000002","completion":"0.000012","audio":"0.000002","input_cache_read":"0.0000002","input_cache_write":"0.0000045","input_cached_audio":"0.0000002","web_search":"0.014"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["gemini-3.1-pro-preview"]},{"id":"minimax/m2-her","object":"model","created":1769126400,"owned_by":"minimax","canonical_slug":"minimax-m2-her-2026-02-12","name":"MiniMax: MiniMax M2 Her","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["m2-her"]},{"id":"minimax/minimax-m2","object":"model","created":1761177600,"owned_by":"minimax","canonical_slug":"minimax-m2-2025-10-23","name":"MiniMax: MiniMax M2","description":"MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.","context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":204800,"max_completion_tokens":131000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["minimax-m2"]},{"id":"minimax/minimax-m2.1","object":"model","created":1766448000,"owned_by":"minimax","canonical_slug":"minimax-m2.1-2025-12-23","name":"MiniMax: MiniMax M2.1","description":"MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.","context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":204800,"max_completion_tokens":131000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["minimax-m2.1"]},{"id":"minimax/minimax-m2.1-lightning","object":"model","created":1766448000,"owned_by":"minimax","canonical_slug":"minimax-m2.1-lightning-2025-12-23","name":"MiniMax: MiniMax M2.1 Lightning","description":"MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.","context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000024","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":204800,"max_completion_tokens":131000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["minimax-m2.1-lightning"]},{"id":"minimax/minimax-m2.5","object":"model","created":1770854400,"owned_by":"minimax","canonical_slug":"minimax-m2.5-2026-02-12","name":"MiniMax: MiniMax M2.5","description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["minimax-m2.5"]},{"id":"minimax/minimax-m2.5-lightning","object":"model","created":1770854400,"owned_by":"minimax","canonical_slug":"minimax-m2.5-lightning-2026-02-12","name":"MiniMax: MiniMax M2.5 Lightning","description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000024","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["minimax-m2.5-lightning"]},{"id":"minimax/minimax-m2.7","object":"model","created":1773792000,"owned_by":"minimax","canonical_slug":"minimax-m2.7-2026-03-18","name":"MiniMax: MiniMax M2.7","description":"M2.7 delivers outstanding performance in real-world software engineering, including end-to-end complete project delivery, log analysis and bug triaging, code security, machine learning, and more. On the benchmark SWE-Pro, M2.7 scores 56.22%, nearly matching the level of Opus. This capability also extends to end-to-end complete project delivery scenarios (VIBE-Pro 55.6%) and deep understanding of complex engineering systems on Terminal Bench 2 (57.0%).","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012","input_cache_read":"0.00000006","input_cache_write":"0.000000375"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["minimax-m2.7"]},{"id":"minimax/minimax-m2.7-highspeed","object":"model","created":1773792000,"owned_by":"minimax","canonical_slug":"minimax-m2.7-highspeed-2026-03-18","name":"MiniMax: MiniMax M2.7 Highspeed","description":"M2.7 highspeed: Same performance, faster, more agile","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000006","completion":"0.0000024","input_cache_read":"0.00000006","input_cache_write":"0.000000375"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["minimax-m2.7-highspeed"]},{"id":"moonshotai/kimi-k2.5","object":"model","created":1769472000,"owned_by":"MoonshotAI","canonical_slug":"kimi-k2.5-20260127","name":"MoonshotAI: Kimi K2.5","description":"Kimi K2.5 is Moonshot AI''s native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.","context_length":262144,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"kimi","instruct_type":null},"pricing":{"prompt":"0.0000006","completion":"0.000003","input_cache_read":"0.0000001","web_search":"0.0043"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["kimi-k2.5"]},{"id":"moonshotai/kimi-k2.6","object":"model","created":1776729600,"owned_by":"MoonshotAI","canonical_slug":"kimi-k2.6-20260421","name":"MoonshotAI: Kimi K2.6","description":"Kimi K2.6 是 Kimi 最新最智能的模型，Kimi K2.6 的通用 Agent、代码、视觉理解等综合能力得到全面提升，其中在博士级难度的完整版人类最后的考试（Humanity’s Last Exam）、在考察模型真实软件工程能力的 SWE-Bench Pro、评估 Agent 深度检索能力的 DeepSearchQA 等基准测试中均取得行业领先的成绩，同时支持文本、图片与视频输入，思考与非思考模式，对话与 Agent 任务。","context_length":262144,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"kimi","instruct_type":null},"pricing":{"prompt":"0.00000095","completion":"0.000004","input_cache_read":"0.00000016","web_search":"0.0043"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["kimi-k2.6","kimi-k2.6-20260421"]},{"id":"openai/gpt-4.1","object":"model","created":1744588800,"owned_by":"azure","canonical_slug":"gpt-4.1-2025-04-14","name":"GPT-4.1","description":"OpenAI's flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. Features 1M context window for complex document analysis and code generation.","context_length":1047576,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.000002","completion":"0.000008","input_cache_read":"0.0000005","web_search":"0.01"},"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-4.1"]},{"id":"openai/gpt-4.1-mini","object":"model","created":1744588800,"owned_by":"azure","canonical_slug":"gpt-4.1-mini-2025-04-14","name":"GPT-4.1 Mini","description":"Mid-sized GPT-4.1 variant with GPT-4o performance at lower latency and cost. Features 1M context window, supports structured outputs and vision understanding at reduced pricing.","context_length":1047576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.0000004","completion":"0.0000016","input_cache_read":"0.0000001","web_search":"0.01"},"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-4.1-mini"]},{"id":"openai/gpt-4o","object":"model","created":1715558400,"owned_by":"azure","canonical_slug":"gpt-4o-2024-11-20","name":"GPT-4o","description":"OpenAI's flagship multimodal model supporting text and image inputs with text outputs. Maintains GPT-4 Turbo intelligence at 2x speed and 50% lower cost. Optimized for complex reasoning, coding, and visual understanding tasks.","context_length":128000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.0000025","completion":"0.00001","input_cache_read":"0.00000125","web_search":"0.01"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-4o"]},{"id":"openai/gpt-4o-mini","object":"model","created":1721260800,"owned_by":"azure","canonical_slug":"gpt-4o-mini-2024-07-18","name":"GPT-4o Mini","description":"OpenAI's advanced small model, 60% cheaper than GPT-3.5 Turbo while scoring 82% on MMLU. Optimized for cost-effective tasks requiring strong language understanding and generation capabilities.","context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000015","completion":"0.0000006","input_cache_read":"0.000000075","web_search":"0.01"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-4o-mini"]},{"id":"openai/gpt-4o-mini-transcribe","object":"model","created":1765756800,"owned_by":"azure","canonical_slug":"gpt-4o-mini-transcribe-20251215","name":"OpenAI: GPT 4o Mini Transcribe","description":"GPT-4o Mini Transcribe is OpenAI's smaller, cost-efficient speech-to-text model built on GPT-4o Mini audio capabilities. It's priced per token (input and output), making it suitable for high-volume transcription workflows that benefit from token-level billing transparency at a lower cost point.","context_length":128000,"architecture":{"modality":"text+audio-\u003etext","input_modalities":["text","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000125","completion":"0.000005","audio":"0.00000125"},"top_provider":{"context_length":128000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/audio/transcriptions"],"aliases":["gpt-4o-mini-transcribe","gpt-4o-mini-transcribe-20251215"]},{"id":"openai/gpt-4o-transcribe-diarize","object":"model","created":1760486400,"owned_by":"azure","canonical_slug":"gpt-4o-transcribe-20250320","name":"OpenAI: GPT 4o Transcribe Diarize","description":"GPT-4o Transcribe is OpenAI's high-quality speech-to-text model built on GPT-4o audio capabilities. It's priced per token (input and output), making it suitable for workflows that benefit from token-level billing transparency.","context_length":128000,"architecture":{"modality":"text+audio-\u003etext","input_modalities":["text","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.0000025","completion":"0.00001","audio":"0.0000025"},"top_provider":{"context_length":128000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/audio/transcriptions"],"aliases":["gpt-4o-transcribe-diarize","gpt-4o-transcribe-20250320"]},{"id":"openai/gpt-5","object":"model","created":1754524800,"owned_by":"azure","canonical_slug":"gpt-5-2025-12-15","name":"GPT-5","description":"OpenAI's next-generation flagship model with 256K context window and advanced multimodal capabilities. Features enhanced reasoning, agentic workflows, and state-of-the-art performance across all benchmarks.","context_length":256000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000125","completion":"0.00001","input_cache_read":"0.00000013","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5"]},{"id":"openai/gpt-5-mini","object":"model","created":1754524800,"owned_by":"azure","canonical_slug":"gpt-5-mini-2025-12-15","name":"GPT-5 Mini","description":"Cost-effective GPT-5 variant with 256K context. Balanced performance for everyday tasks while maintaining strong reasoning and vision capabilities at reduced cost.","context_length":256000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000025","completion":"0.000002","input_cache_read":"0.00000003","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5-mini"]},{"id":"openai/gpt-5-nano","object":"model","created":1754524800,"owned_by":"azure","canonical_slug":"gpt-5-nano-2025-12-15","name":"GPT-5 Nano","description":"Ultra-efficient GPT-5 variant optimized for maximum throughput at minimal cost. Features 128K context and low-latency design for high-volume automation tasks.","context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000005","completion":"0.0000004","input_cache_read":"0.00000001","web_search":"0.01"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5-nano"]},{"id":"openai/gpt-5.1","object":"model","created":1762992000,"owned_by":"azure","canonical_slug":"gpt-5.1-2026-01-08","name":"OpenAI: GPT-5.1","description":"OpenAI's enhanced GPT-5.1 model with improved reasoning and multimodal capabilities. Features 256K context with better instruction following and coding performance.","context_length":256000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000125","completion":"0.00001","input_cache_read":"0.00000013","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.1"]},{"id":"openai/gpt-5.1-codex-max","object":"model","created":1764806400,"owned_by":"azure","canonical_slug":"gpt-5.1-codex-max-2025-12-04","name":"OpenAI: GPT-5.1 Codex Max","description":"GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research.","context_length":256000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000125","completion":"0.00001","input_cache_read":"0.00000013","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.1-codex-max"]},{"id":"openai/gpt-5.1-codex-mini","object":"model","created":1762992000,"owned_by":"azure","canonical_slug":"gpt-5.1-codex-mini-2025-11-13","name":"GPT-5.1 Codex Mini","description":"GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex","context_length":256000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000025","completion":"0.000002","input_cache_read":"0.00000003","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.1-codex-mini"]},{"id":"openai/gpt-5.2","object":"model","created":1765411200,"owned_by":"azure","canonical_slug":"gpt-5.2-2026-01-10","name":"OpenAI: GPT-5.2","description":"OpenAI's latest GPT-5.2 model with 512K context window. Most advanced capabilities including enhanced reasoning, full multimodal support, and web search integration.","context_length":512000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000175","completion":"0.000014","input_cache_read":"0.00000018","web_search":"0.01"},"top_provider":{"context_length":512000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.2"]},{"id":"openai/gpt-5.2-codex","object":"model","created":1768348800,"owned_by":"azure","canonical_slug":"gpt-5.2-codex-2026-01-14","name":"OpenAI: GPT-5.2 Codex","description":"GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1-Codex, 5.2-Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the reasoning.effort parameter.","context_length":512000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000175","completion":"0.000014","input_cache_read":"0.00000018","web_search":"0.01"},"top_provider":{"context_length":512000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.2-codex"]},{"id":"openai/gpt-5.3-chat","object":"model","created":1772496000,"owned_by":"azure","canonical_slug":"gpt-5.3-chat-2026-03-03","name":"OpenAI: GPT-5.3 Chat","description":"GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly reduces unnecessary refusals, caveats, and overly cautious phrasing that can interrupt conversational flow.","context_length":128000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000175","completion":"0.000014","input_cache_read":"0.00000018","web_search":"0.01"},"top_provider":{"context_length":128000,"max_completion_tokens":16000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.3-chat"]},{"id":"openai/gpt-5.3-codex","object":"model","created":1771977600,"owned_by":"azure","canonical_slug":"gpt-5.3-codex-2026-02-06","name":"OpenAI: GPT-5.3 Codex","description":"GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results on SWE-Bench Pro and strong performance on Terminal-Bench 2.0 and OSWorld-Verified, reflecting improved multi-language coding, terminal proficiency, and real-world computer-use skills.","context_length":512000,"architecture":{"modality":"text+image+audio-\u003etext","input_modalities":["text","image","audio"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000175","completion":"0.000014","input_cache_read":"0.00000018","web_search":"0.01"},"top_provider":{"context_length":512000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/responses"],"aliases":["gpt-5.3-codex"]},{"id":"openai/gpt-5.4","object":"model","created":1772668800,"owned_by":"azure","canonical_slug":"gpt-5.4-2026-03-05","name":"OpenAI: GPT-5.4","description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.The model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.","context_length":1050000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.0000025","completion":"0.000015","input_cache_read":"0.00000025","web_search":"0.01"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.4"]},{"id":"openai/gpt-5.4-mini","object":"model","created":1773705600,"owned_by":"azure","canonical_slug":"gpt-5.4-mini-2026-03-17","name":"OpenAI: GPT-5.4 Mini","description":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use, while reducing latency and cost for large-scale deployments.","context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000075","completion":"0.0000045","input_cache_read":"0.000000075","web_search":"0.01"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.4-mini"]},{"id":"openai/gpt-5.4-nano","object":"model","created":1773705600,"owned_by":"azure","canonical_slug":"gpt-5.4-nano-2026-03-17","name":"OpenAI: GPT-5.4 Nano","description":"GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency use cases such as classification, data extraction, ranking, and sub-agent execution.","context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.0000002","completion":"0.00000125","input_cache_read":"0.00000002","web_search":"0.01"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.4-nano"]},{"id":"openai/gpt-5.4-pro","object":"model","created":1772668800,"owned_by":"azure","canonical_slug":"gpt-5.4-pro-2026-03-05","name":"OpenAI: GPT-5.4 Pro","description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs. Optimized for step-by-step reasoning, instruction following, and accuracy, GPT-5.4 Pro excels at agentic coding, long-context workflows, and multi-step problem solving.","context_length":1050000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00003","completion":"0.00018","web_search":"0.01"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/responses"],"aliases":["gpt-5.4-pro"]},{"id":"openai/gpt-5.5","object":"model","created":1776988800,"owned_by":"azure","canonical_slug":"gpt-5.5-2026-04-25","name":"OpenAI: GPT-5.5","description":"GPT-5.5 is OpenAI’s frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large-scale reasoning, coding, and multimodal workflows within a single system.","context_length":1050000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.00003","input_cache_read":"0.0000005","web_search":"0.01"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["gpt-5.5","gpt-5.5-2026-04-24"]},{"id":"openai/gpt-image-1.5","object":"model","created":1765843200,"owned_by":"azure","canonical_slug":"gpt-image-1.5-2025-12-16","name":"OpenAI: GPT Image 1.5","description":"GPT Image 1.5 is our latest image generation model, with better instruction following and adherence to prompts.","context_length":0,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.00001","image":"0.000008","input_cache_read":"0.00000125","input_cached_image":"0.000002","output_image":"0.000032"},"top_provider":{"context_length":0,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/images/generations"],"aliases":["gpt-image-1.5","gpt-image-1.5-2025-12-16","gpt-image-1.5-20251216"]},{"id":"openai/gpt-image-2","object":"model","created":1776729600,"owned_by":"azure","canonical_slug":"gpt-image-2-2026-04-21","name":"OpenAI: GPT Image 2","description":"GPT-image-2 is OpenAI's latest cutting-edge image generation model. Key value adds include better performance, quality, editing controls, and face preservation.\r\nThe model supports high input_fidelity and adding/removing one aspect of the image while retaining others. This model includes improvements in aspect ratio, resolution, and editing capabilities.","context_length":0,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.00003","image":"0.000008","input_cache_read":"0.00000125","input_cached_image":"0.000002","output_image":"0.00003"},"top_provider":{"context_length":0,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/images/generations"],"aliases":["gpt-image-2","gpt-image-2-2026-04-21","gpt-image-2-20260421"]},{"id":"openai/text-embedding-3-large","object":"model","created":1761782400,"owned_by":"azure","canonical_slug":"text-embedding-3-large","name":"OpenAI: Text Embedding 3 Large","description":"text-embedding-3-large is OpenAI's most capable embedding model for both english and non-english tasks. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.","context_length":8200,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000013","completion":"0"},"top_provider":{"context_length":8200,"max_completion_tokens":8200,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/embeddings"],"aliases":["text-embedding-3-large"]},{"id":"openai/text-embedding-3-small","object":"model","created":1761782400,"owned_by":"azure","canonical_slug":"text-embedding-3-small","name":"OpenAI: Text Embedding 3 Small","description":"text-embedding-3-small is OpenAI's improved, more performant version of the ada embedding model. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.","context_length":8200,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000002","completion":"0"},"top_provider":{"context_length":8200,"max_completion_tokens":8200,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/embeddings"],"aliases":["text-embedding-3-small"]},{"id":"volcengine/doubao-seed-1-6","object":"model","created":1760486400,"owned_by":"volcengine","canonical_slug":"doubao-seed-1-6-251015","name":"Doubao Seed 1.6","description":"ByteDance Doubao Seed 1.6 via Volcengine. Latest generation model with 128K context, enhanced reasoning, and improved capabilities.","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000012","completion":"0.00000029","input_cache_read":"0.000000023"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["doubao-seed-1-6","doubao-seed-1-6-251015"]},{"id":"volcengine/doubao-seed-1-6-flash","object":"model","created":1756339200,"owned_by":"volcengine","canonical_slug":"doubao-seed-1-6-flash-250828","name":"Doubao Seed 1.6 Flash","description":"ByteDance Doubao Seed 1.6 Flash via Volcengine. Fast inference variant optimized for high throughput and low latency applications.","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000003","completion":"0.00000022","input_cache_read":"0.0000000043"},"top_provider":{"context_length":256000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["doubao-seed-1-6-flash","doubao-seed-1-6-flash-250828"]},{"id":"volcengine/doubao-seed-1-6-vision","object":"model","created":1755216000,"owned_by":"volcengine","canonical_slug":"doubao-seed-1-6-vision-250815","name":"Doubao Seed 1.6 Vision","description":"ByteDance Doubao Seed 1.6 Vision via Volcengine. Multimodal model with strong image understanding and visual reasoning capabilities.","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000012","completion":"0.00000115","input_cache_read":"0.000000023"},"top_provider":{"context_length":256000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["doubao-seed-1-6-vision","doubao-seed-1-6-vision-250815"]},{"id":"volcengine/doubao-seed-1-8","object":"model","created":1766880000,"owned_by":"volcengine","canonical_slug":"doubao-seed-1-8-251228","name":"Doubao Seed 1.8","description":"ByteDance Doubao Seed 1.8 via Volcengine. Next generation model with improved reasoning, enhanced capabilities, and better performance.","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000012","completion":"0.00000029","input_cache_read":"0.000000023"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["doubao-seed-1-8","doubao-seed-1-8-251228"]},{"id":"volcengine/doubao-seed-2.0-code","object":"model","created":1771027200,"owned_by":"volcengine","canonical_slug":"doubao-seed-2-0-code-preview-260215","name":"Doubao Seed 2.0 Code","description":"Doubao-Seed-2.0-Code 面向企业级编程需求优化，在 Seed 2.0 优秀的 Agent、VLM 能力基础上，特别增强了代码能力，不仅前端能力表现出众，也对企业常见的多语言编码需求做了特别优化，适合接入各种 AI 编程工具使用。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000067","completion":"0.00000336","input_cache_read":"0.00000014","input_cache_write":"0.0000000024"},"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["doubao-seed-2.0-code","doubao-seed-2-0-code-preview-260215"]},{"id":"volcengine/doubao-seed-2.0-lite","object":"model","created":1771027200,"owned_by":"volcengine","canonical_slug":"doubao-seed-2-0-lite-260215","name":"Doubao Seed 2.0 Lite","description":"Doubao-Seed-2.0-lite 是面向高频企业场景兼顾性能与成本的均衡型模型，综合能力超越上一代Doubao-Seed-1.8。胜任非结构化信息处理、内容创作、搜索推荐、数据分析等生产型工作，支持长上下文、多源信息融合、多步指令执行与高保真结构化输出。在保障稳定效果的同时显著优化成本。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000013","completion":"0.00000076","input_cache_read":"0.00000003","input_cache_write":"0.0000000024"},"top_provider":{"context_length":256000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["doubao-seed-2.0-lite","doubao-seed-2-0-lite-260215"]},{"id":"volcengine/doubao-seed-2.0-mini","object":"model","created":1771027200,"owned_by":"volcengine","canonical_slug":"doubao-seed-2-0-mini-260215","name":"Doubao Seed 2.0 Mini","description":"Doubao-Seed-2.0-mini 面向低时延、高并发与成本敏感场景，强调快速响应与灵活推理部署。模型效果与Doubao-Seed-1.6相当。支持256k上下文、4档思考长度和多模态理解，适合成本和速度优先的轻量级任务。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000006","completion":"0.00000056","input_cache_read":"0.00000002","input_cache_write":"0.0000000024"},"top_provider":{"context_length":256000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["doubao-seed-2.0-mini","doubao-seed-2-0-mini-260215"]},{"id":"volcengine/doubao-seed-2.0-pro","object":"model","created":1771027200,"owned_by":"volcengine","canonical_slug":"doubao-seed-2-0-pro-260215","name":"Doubao Seed 2.0 Pro","description":"Doubao-Seed-2.0-pro是旗舰级全能通用模型，面向 Agent 时代的复杂推理与长链路任务执行场景。强调多模态理解、长上下文推理、结构化生成与工具增强执行。复杂指令与多约束执行能力突出，可稳定应对多步复杂规划、复杂图文推理、视频内容理解与高难度分析等场景。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000067","completion":"0.00000336","input_cache_read":"0.00000014","input_cache_write":"0.0000000024"},"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions","/v1/responses"],"aliases":["doubao-seed-2.0-pro","doubao-seed-2-0-pro-260215"]},{"id":"volcengine/doubao-seedream-4.5","object":"model","created":1764288000,"owned_by":"volcengine","canonical_slug":"doubao-seedream-4-5-251128","name":"Doubao Seedream 4.5","description":"Seedream 4.5 是字节跳动最新推出的图像多模态模型，整合了文生图、图生图、组图输出等能力，融合常识和推理能力。相比前代4.0模型生成效果大幅提升，具备更好的编辑一致性和多图融合效果，能更精准的控制画面细节，小字、小人脸生成更自然，图片排版、色彩更和谐，美感提升","context_length":100000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0","completion":"0","output_image_per_num":"0.04"},"top_provider":{"context_length":100000,"max_completion_tokens":100000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/images/generations"],"aliases":["doubao-seedream-4.5","doubao-seedream-4-5-251128"]},{"id":"volcengine/doubao-seedream-5.0-lite","object":"model","created":1769558400,"owned_by":"volcengine","canonical_slug":"doubao-seedream-5-0-260128","name":"Doubao Seedream 5.0 Lite","description":"Doubao-Seedream-5.0-lite是字节跳动发布的最新图像创作模型。该模型首次搭载联网检索功能，能融合实时网络信息，提升生图时效性。同时，模型的聪明度进一步升级，能够精准解析复杂指令和视觉内容。此外，模型在世界知识广度、参考一致性及专业场景生成质量上均有增强，可更好地满足企业级视觉创作需求。","context_length":100000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0","completion":"0","output_image_per_num":"0.035"},"top_provider":{"context_length":100000,"max_completion_tokens":100000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/images/generations"],"aliases":["doubao-seedream-5.0-lite","doubao-seedream-5-0-260128"]},{"id":"x-ai/grok-4.1-fast","object":"model","created":1763510400,"owned_by":"xai","canonical_slug":"grok-4-1-fast-non-reasoning","name":"xAI: Grok 4.1 Fast","description":"xAI Grok 4.1 Fast - latest version with improved tool calling and 2M context. Best agentic performance from xAI without explicit reasoning.","context_length":2000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"grok","instruct_type":null},"pricing":{"prompt":"0.0000002","completion":"0.0000005","input_cache_read":"0.00000005"},"top_provider":{"context_length":2000000,"max_completion_tokens":30000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"]},{"id":"x-ai/grok-4.20","object":"model","created":1774915200,"owned_by":"xai","canonical_slug":"grok-4-20-non-reasoning","name":"xAI: Grok 4.20","description":"Grok 4.20 is xAI's newest flagship model with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering consistently precise and truthful responses.","context_length":2000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"grok","instruct_type":null},"pricing":{"prompt":"0.000004","completion":"0.000012","input_cache_read":"0.0000004"},"top_provider":{"context_length":2000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"]},{"id":"z-ai/glm-4.6","object":"model","created":1759190400,"owned_by":"zhipu","canonical_slug":"glm-4.6","name":"Z.ai: GLM-4.6","description":"Zhipu GLM-4.6 with 128K context and strong instruction following. Excellent balance of performance and cost for Chinese language tasks.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000004","completion":"0.0000019","input_cache_read":"0.00000011","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["glm-4.6"]},{"id":"z-ai/glm-4.7","object":"model","created":1766448000,"owned_by":"zhipu","canonical_slug":"glm-4.7","name":"Z.ai: GLM 4.7","description":"Zhipu GLM-4.7 - latest flagship model with advanced reasoning, 128K context, and web search capabilities. Best-in-class Chinese language understanding.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000004","completion":"0.000002","input_cache_read":"0.00000008","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["glm-4.7"]},{"id":"z-ai/glm-4.7-flash:free","object":"model","created":1768780800,"owned_by":"zhipu","canonical_slug":"glm-4.7-flash","name":"Z.ai: GLM-4.7-Flash (Free)","description":"As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0","completion":"0","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["glm-4.7-flash:free"]},{"id":"z-ai/glm-4.7-flashx","object":"model","created":1768780800,"owned_by":"zhipu","canonical_slug":"glm-4.7-flashx","name":"Z.ai: GLM-4.7 FlashX","description":"As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.000000072","completion":"0.00000043","input_cache_read":"0.000000015","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["glm-4.7-flashx"]},{"id":"z-ai/glm-5","object":"model","created":1770768000,"owned_by":"zhipu","canonical_slug":"glm-5","name":"Z.ai: GLM-5","description":"GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading closed-source models. With advanced agentic planning, deep backend reasoning, and iterative self-correction, GLM-5 moves beyond code generation to full-system construction and autonomous execution.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.000001","completion":"0.0000032","input_cache_read":"0.0000002","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["glm-5"]},{"id":"z-ai/glm-5-turbo","object":"model","created":1773619200,"owned_by":"zhipu","canonical_slug":"glm-5-turbo","name":"Z.ai: GLM-5-Turbo","description":"GLM-5-Turbo is a foundation model deeply optimized for the OpenClaw scenario. It has been specifically optimized for the core requirements of OpenClaw tasks since the training phase, enhancing key capabilities such as tool invocation, command following, timed and persistent tasks, and long-chain execution.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000012","completion":"0.000004","input_cache_read":"0.00000024","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["glm-5-turbo"]},{"id":"z-ai/glm-5.1","object":"model","created":1774569600,"owned_by":"zhipu","canonical_slug":"glm-5.1","name":"Z.ai: GLM 5.1","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000014","completion":"0.0000044","input_cache_read":"0.00000026","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["glm-5.1"]},{"id":"z-ai/glm-5v-turbo","object":"model","created":1775001600,"owned_by":"zhipu","canonical_slug":"glm-5v-turbo","name":"GLM-5V-Turbo","description":"GLM-5V-Turbo is Z.AI’s first multimodal coding foundation model, built for vision-based coding tasks. It can natively process multimodal inputs such as images, video, and text, while also excelling at long-horizon planning, complex coding, and action execution. ","context_length":200000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000012","completion":"0.000004","input_cache_read":"0.00000024","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"default_parameters":null,"expiration_date":null,"is_deprecated":false,"supported_endpoints":["/v1/chat/completions"],"aliases":["glm-5v-turbo"]}]}
