From 8b234ae57cabfb88ef7556c1099acf005bab831b Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Thu, 27 Feb 2025 02:28:01 +0100
Subject: [PATCH] Azure completion_tokens fix (take two) (#6975)

---
 openhands/llm/llm.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index f6b2954565..8398bb5849 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -137,6 +137,7 @@ class LLM(RetryMixin, DebugMixin):
         # set up the completion function
         kwargs: dict[str, Any] = {
             'temperature': self.config.temperature,
+            'max_completion_tokens': self.config.max_output_tokens,
         }
         if (
             self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS
@@ -146,6 +147,10 @@ class LLM(RetryMixin, DebugMixin):
             kwargs.pop(
                 'temperature'
             )  # temperature is not supported for reasoning models
+        # Azure issue: https://github.com/All-Hands-AI/OpenHands/issues/6777
+        if self.config.model.startswith('azure'):
+            kwargs['max_tokens'] = self.config.max_output_tokens
+            kwargs.pop('max_completion_tokens')
 
         self._completion = partial(
             litellm_completion,
@@ -156,7 +161,6 @@ class LLM(RetryMixin, DebugMixin):
             base_url=self.config.base_url,
             api_version=self.config.api_version,
             custom_llm_provider=self.config.custom_llm_provider,
-            max_completion_tokens=self.config.max_output_tokens,
             timeout=self.config.timeout,
             top_p=self.config.top_p,
             drop_params=self.config.drop_params,