From 502e2d0387d575f84901bfabced4946e26294c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hartmut=20N=C3=B6renberg?= Date: Sun, 15 Mar 2026 14:20:40 +0100 Subject: [PATCH] fix: use max_completion_tokens instead of max_tokens for GPT-4o Azure OpenAI GPT-4o and newer models require 'max_completion_tokens' instead of 'max_tokens'. Fixed in all 3 services: - chat_service.py (2 call sites) - azure_ai.py (validation service) - tenants/router.py (test connection endpoint) Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/app/domains/tenants/router.py | 2 +- backend/app/services/azure_ai.py | 2 +- backend/app/services/chat_service.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/app/domains/tenants/router.py b/backend/app/domains/tenants/router.py index 75f755e..2ae6daf 100644 --- a/backend/app/domains/tenants/router.py +++ b/backend/app/domains/tenants/router.py @@ -193,7 +193,7 @@ async def test_tenant_ai_config( client.chat.completions.create( model=deployment, messages=[{"role": "user", "content": "ping"}], - max_tokens=5, + max_completion_tokens=5, ) return {"ok": True} except Exception as exc: diff --git a/backend/app/services/azure_ai.py b/backend/app/services/azure_ai.py index 657608a..7a9f4bd 100644 --- a/backend/app/services/azure_ai.py +++ b/backend/app/services/azure_ai.py @@ -124,7 +124,7 @@ def _call_azure_vision( ], } ], - max_tokens=max_tokens, + max_completion_tokens=max_tokens, temperature=temperature, ) diff --git a/backend/app/services/chat_service.py b/backend/app/services/chat_service.py index 42dde14..6aa33af 100644 --- a/backend/app/services/chat_service.py +++ b/backend/app/services/chat_service.py @@ -701,7 +701,7 @@ async def chat_with_agent( messages=messages, tools=TOOLS, tool_choice="auto", - max_tokens=creds["max_tokens"], + max_completion_tokens=creds["max_tokens"], temperature=creds["temperature"], ) if response.usage: @@ -744,7 +744,7 @@ async def chat_with_agent( messages=messages, tools=TOOLS, tool_choice="auto", - max_tokens=creds["max_tokens"], + max_completion_tokens=creds["max_tokens"], temperature=creds["temperature"], ) if response.usage: