From 502e2d0387d575f84901bfabced4946e26294c75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hartmut=20N=C3=B6renberg?= <hn@hartmut-noerenberg.com>
Date: Sun, 15 Mar 2026 14:20:40 +0100
Subject: [PATCH] fix: use max_completion_tokens instead of max_tokens for
 GPT-4o

Azure OpenAI GPT-4o and newer models require 'max_completion_tokens'
instead of 'max_tokens'. Fixed in all 3 services:
- chat_service.py (2 call sites)
- azure_ai.py (validation service)
- tenants/router.py (test connection endpoint)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 backend/app/domains/tenants/router.py | 2 +-
 backend/app/services/azure_ai.py      | 2 +-
 backend/app/services/chat_service.py  | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/backend/app/domains/tenants/router.py b/backend/app/domains/tenants/router.py
index 75f755e..2ae6daf 100644
--- a/backend/app/domains/tenants/router.py
+++ b/backend/app/domains/tenants/router.py
@@ -193,7 +193,7 @@ async def test_tenant_ai_config(
             client.chat.completions.create(
                 model=deployment,
                 messages=[{"role": "user", "content": "ping"}],
-                max_tokens=5,
+                max_completion_tokens=5,
             )
             return {"ok": True}
         except Exception as exc:
diff --git a/backend/app/services/azure_ai.py b/backend/app/services/azure_ai.py
index 657608a..7a9f4bd 100644
--- a/backend/app/services/azure_ai.py
+++ b/backend/app/services/azure_ai.py
@@ -124,7 +124,7 @@ def _call_azure_vision(
                     ],
                 }
             ],
-            max_tokens=max_tokens,
+            max_completion_tokens=max_tokens,
             temperature=temperature,
         )
 
diff --git a/backend/app/services/chat_service.py b/backend/app/services/chat_service.py
index 42dde14..6aa33af 100644
--- a/backend/app/services/chat_service.py
+++ b/backend/app/services/chat_service.py
@@ -701,7 +701,7 @@ async def chat_with_agent(
         messages=messages,
         tools=TOOLS,
         tool_choice="auto",
-        max_tokens=creds["max_tokens"],
+        max_completion_tokens=creds["max_tokens"],
         temperature=creds["temperature"],
     )
     if response.usage:
@@ -744,7 +744,7 @@ async def chat_with_agent(
             messages=messages,
             tools=TOOLS,
             tool_choice="auto",
-            max_tokens=creds["max_tokens"],
+            max_completion_tokens=creds["max_tokens"],
             temperature=creds["temperature"],
         )
         if response.usage: