fix: use max_completion_tokens instead of max_tokens for GPT-4o

Azure OpenAI GPT-4o and newer models require 'max_completion_tokens'
instead of 'max_tokens'. Fixed in all 3 services:
- chat_service.py (2 call sites)
- azure_ai.py (validation service)
- tenants/router.py (test connection endpoint)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-15 14:20:40 +01:00
parent 59ce61098c
commit 502e2d0387
3 changed files with 4 additions and 4 deletions
+1 -1
View File
@@ -193,7 +193,7 @@ async def test_tenant_ai_config(
client.chat.completions.create( client.chat.completions.create(
model=deployment, model=deployment,
messages=[{"role": "user", "content": "ping"}], messages=[{"role": "user", "content": "ping"}],
max_tokens=5, max_completion_tokens=5,
) )
return {"ok": True} return {"ok": True}
except Exception as exc: except Exception as exc:
+1 -1
View File
@@ -124,7 +124,7 @@ def _call_azure_vision(
], ],
} }
], ],
max_tokens=max_tokens, max_completion_tokens=max_tokens,
temperature=temperature, temperature=temperature,
) )
+2 -2
View File
@@ -701,7 +701,7 @@ async def chat_with_agent(
messages=messages, messages=messages,
tools=TOOLS, tools=TOOLS,
tool_choice="auto", tool_choice="auto",
max_tokens=creds["max_tokens"], max_completion_tokens=creds["max_tokens"],
temperature=creds["temperature"], temperature=creds["temperature"],
) )
if response.usage: if response.usage:
@@ -744,7 +744,7 @@ async def chat_with_agent(
messages=messages, messages=messages,
tools=TOOLS, tools=TOOLS,
tool_choice="auto", tool_choice="auto",
max_tokens=creds["max_tokens"], max_completion_tokens=creds["max_tokens"],
temperature=creds["temperature"], temperature=creds["temperature"],
) )
if response.usage: if response.usage: