fix(azure/responses): always remove status

unsupported parameter
2026-07-01 20:44:04 -04:00 · 2025-10-06 18:08:57 -07:00
parent fbf7ca17ba
commit 5336fcc000
6 changed files with 113 additions and 52 deletions
@@ -1,6 +1,7 @@
 from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple

 import httpx
+from openai.types.responses import ResponseReasoningItem

 from litellm._logging import verbose_logger
 from litellm.llms.azure.common_utils import BaseAzureLLM
@@ -38,6 +39,50 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
            model = model.replace("o_series/", "")
        return model

+    def _handle_reasoning_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Handle reasoning items specifically to filter out status=None using OpenAI's model.
+        Issue: https://github.com/BerriAI/litellm/issues/13484
+        OpenAI API does not accept ReasoningItem(status=None), so we need to:
+        1. Check if the item is a reasoning type
+        2. Create a ResponseReasoningItem object with the item data
+        3. Convert it back to dict with exclude_none=True to filter None values
+        """
+        if item.get("type") == "reasoning":
+            try:
+                # Ensure required fields are present for ResponseReasoningItem
+                item_data = dict(item)
+                if "id" not in item_data:
+                    item_data["id"] = f"reasoning_{hash(str(item_data))}"
+                if "summary" not in item_data:
+                    item_data["summary"] = (
+                        item_data.get("reasoning_content", "")[:100] + "..."
+                        if len(item_data.get("reasoning_content", "")) > 100
+                        else item_data.get("reasoning_content", "")
+                    )
+
+                # Create ResponseReasoningItem object from the item data
+                reasoning_item = ResponseReasoningItem(**item_data)
+
+                # Convert back to dict with exclude_none=True to exclude None fields
+                dict_reasoning_item = reasoning_item.model_dump(exclude_none=True)
+                dict_reasoning_item.pop("status", None)
+
+                return dict_reasoning_item
+            except Exception as e:
+                verbose_logger.debug(
+                    f"Failed to create ResponseReasoningItem, falling back to manual filtering: {e}"
+                )
+                # Fallback: manually filter out known None fields
+                filtered_item = {
+                    k: v
+                    for k, v in item.items()
+                    if v is not None
+                    or k not in {"status", "content", "encrypted_content"}
+                }
+                return filtered_item
+        return item
+
    def transform_responses_api_request(
        self,
        model: str,
@@ -48,12 +93,13 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
    ) -> Dict:
        """No transform applied since inputs are in OpenAI spec already"""
        stripped_model_name = self.get_stripped_model_name(model)
-        return dict(
-            ResponsesAPIRequestParams(
-                model=stripped_model_name,
-                input=input,
-                **response_api_optional_request_params,
-            )
+
+        return super().transform_responses_api_request(
+            model=stripped_model_name,
+            input=input,
+            response_api_optional_request_params=response_api_optional_request_params,
+            litellm_params=litellm_params,
+            headers=headers,
        )

    def get_complete_url(
@@ -217,15 +263,15 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
        at the correct location (before any query parameters).
        """
        from urllib.parse import urlparse, urlunparse
-        
+
        # Parse the URL to separate its components
        parsed_url = urlparse(api_base)
-        
+
        # Insert the response_id and /cancel at the end of the path component
        # Remove trailing slash if present to avoid double slashes
        path = parsed_url.path.rstrip("/")
        new_path = f"{path}/{response_id}/cancel"
-        
+
        # Reconstruct the URL with all original components but with the modified path
        cancel_url = urlunparse(
            (
@@ -1,12 +1,4 @@
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Optional,
-    Union,
-    cast,
-    get_type_hints,
-)
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast, get_type_hints

 import httpx
 from openai.types.responses import ResponseReasoningItem
@@ -127,7 +119,6 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
        2. Create a ResponseReasoningItem object with the item data
        3. Convert it back to dict with exclude_none=True to filter None values
        """
-        verbose_logger.debug(f"Handling reasoning item: {item}")
        if item.get("type") == "reasoning":
            try:
                # Ensure required fields are present for ResponseReasoningItem
@@ -1,33 +1,6 @@
 model_list:
-  - model_name: openai/gpt-4o
+  - model_name: gpt-5-mini
    litellm_params:
-      model: openai/gpt-4o-mini
-      api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
-      api_key: dummy
-  - model_name: "byok-wildcard/*"
-    litellm_params:
-      model: openai/*
-  - model_name: xai-grok-3
-    litellm_params:
-      model: xai/grok-3
-  - model_name: hosted_vllm/whisper-v3
-    litellm_params:
-      model: hosted_vllm/whisper-v3
-      api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
-      api_key: dummy
-
-# mcp_servers:
-#   github_mcp:
-#     url: "https://api.githubcopilot.com/mcp"
-#     auth_type: oauth2
-#     authorization_url: https://github.com/login/oauth/authorize
-#     token_url: https://github.com/login/oauth/access_token
-#     client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
-#     client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
-#     scopes: ["public_repo", "user:email"]
-#     allowed_tools: ["list_tools"]
-#     # disallowed_tools: ["repo_delete"]
-
-litellm_settings:
-  callbacks: ["prometheus"]
-  custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"]
+      model: azure/gpt-5-mini-2
+      api_key: os.environ/AZURE_API_KEY_ALT
+      api_base: os.environ/AZURE_API_BASE_ALT
@@ -19,6 +19,7 @@ from litellm.types.llms.openai import (
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
 from base_responses_api import BaseResponsesAPITest

+
 class TestAzureResponsesAPITest(BaseResponsesAPITest):
    def get_base_completion_call_args(self):
        return {
@@ -43,4 +44,55 @@ async def test_azure_responses_api_preview_api_version():
        api_base=os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
        api_key=os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
        input="Hello, can you tell me a short joke?",
-    )
+    )
+
+
+@pytest.mark.asyncio
+async def test_azure_responses_api_status_error():
+    """
+    Ensure new azure preview api version is working
+    """
+    litellm._turn_on_debug()
+
+    request_data = {
+        "model": "gpt-5-mini",
+        "input": [
+            {"content": "tell me an interesting fact", "role": "user"},
+            {
+                "id": "rs_0ab687487834d9df0068e462a1b2d88197aabbc832c9ba5316",
+                "summary": [],
+                "type": "reasoning",
+                "content": None,
+                "encrypted_content": None,
+                "status": "completed",
+            },
+            {
+                "id": "msg_0ab687487834d9df0068e462a1df188197b74b1eef05102c18",
+                "content": [
+                    {
+                        "annotations": [],
+                        "text": "Octopuses have three hearts: two pump blood to the gills, while the third pumps it to the rest of the body. Even more unusual, their blood is blue because it uses the copper-containing protein hemocyanin to carry oxygen, which is more efficient than hemoglobin in cold, low-oxygen environments.",
+                        "type": "output_text",
+                        "logprobs": [],
+                    }
+                ],
+                "role": "assistant",
+                "status": "completed",
+                "type": "message",
+            },
+            {"role": "user", "content": "tell me another"},
+        ],
+        "include": [],
+        "instructions": "You are a helpful assistant.",
+        "reasoning": {"effort": "minimal"},
+        "stream": False,
+        "tools": [],
+    }
+    response = await litellm.aresponses(
+        model="azure/gpt-5-mini-2",
+        truncation="auto",
+        api_version="preview",
+        api_base=os.getenv("AZURE_GPT5_MINI_API_BASE"),
+        api_key=os.getenv("AZURE_GPT5_MINI_API_KEY"),
+        input=request_data["input"],
+    )