fix(azure/responses): always remove status

unsupported parameter
This commit is contained in:
Krrish Dholakia
2025-10-06 18:08:57 -07:00
parent fbf7ca17ba
commit 5336fcc000
6 changed files with 113 additions and 52 deletions
+55 -9
View File
@@ -1,6 +1,7 @@
from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple
import httpx
from openai.types.responses import ResponseReasoningItem
from litellm._logging import verbose_logger
from litellm.llms.azure.common_utils import BaseAzureLLM
@@ -38,6 +39,50 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
model = model.replace("o_series/", "")
return model
def _handle_reasoning_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
"""
Handle reasoning items specifically to filter out status=None using OpenAI's model.
Issue: https://github.com/BerriAI/litellm/issues/13484
OpenAI API does not accept ReasoningItem(status=None), so we need to:
1. Check if the item is a reasoning type
2. Create a ResponseReasoningItem object with the item data
3. Convert it back to dict with exclude_none=True to filter None values
"""
if item.get("type") == "reasoning":
try:
# Ensure required fields are present for ResponseReasoningItem
item_data = dict(item)
if "id" not in item_data:
item_data["id"] = f"reasoning_{hash(str(item_data))}"
if "summary" not in item_data:
item_data["summary"] = (
item_data.get("reasoning_content", "")[:100] + "..."
if len(item_data.get("reasoning_content", "")) > 100
else item_data.get("reasoning_content", "")
)
# Create ResponseReasoningItem object from the item data
reasoning_item = ResponseReasoningItem(**item_data)
# Convert back to dict with exclude_none=True to exclude None fields
dict_reasoning_item = reasoning_item.model_dump(exclude_none=True)
dict_reasoning_item.pop("status", None)
return dict_reasoning_item
except Exception as e:
verbose_logger.debug(
f"Failed to create ResponseReasoningItem, falling back to manual filtering: {e}"
)
# Fallback: manually filter out known None fields
filtered_item = {
k: v
for k, v in item.items()
if v is not None
or k not in {"status", "content", "encrypted_content"}
}
return filtered_item
return item
def transform_responses_api_request(
self,
model: str,
@@ -48,12 +93,13 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
) -> Dict:
"""No transform applied since inputs are in OpenAI spec already"""
stripped_model_name = self.get_stripped_model_name(model)
return dict(
ResponsesAPIRequestParams(
model=stripped_model_name,
input=input,
**response_api_optional_request_params,
)
return super().transform_responses_api_request(
model=stripped_model_name,
input=input,
response_api_optional_request_params=response_api_optional_request_params,
litellm_params=litellm_params,
headers=headers,
)
def get_complete_url(
@@ -217,15 +263,15 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
at the correct location (before any query parameters).
"""
from urllib.parse import urlparse, urlunparse
# Parse the URL to separate its components
parsed_url = urlparse(api_base)
# Insert the response_id and /cancel at the end of the path component
# Remove trailing slash if present to avoid double slashes
path = parsed_url.path.rstrip("/")
new_path = f"{path}/{response_id}/cancel"
# Reconstruct the URL with all original components but with the modified path
cancel_url = urlunparse(
(
@@ -1,12 +1,4 @@
from typing import (
TYPE_CHECKING,
Any,
Dict,
Optional,
Union,
cast,
get_type_hints,
)
from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast, get_type_hints
import httpx
from openai.types.responses import ResponseReasoningItem
@@ -127,7 +119,6 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
2. Create a ResponseReasoningItem object with the item data
3. Convert it back to dict with exclude_none=True to filter None values
"""
verbose_logger.debug(f"Handling reasoning item: {item}")
if item.get("type") == "reasoning":
try:
# Ensure required fields are present for ResponseReasoningItem
File diff suppressed because one or more lines are too long
+4 -31
View File
@@ -1,33 +1,6 @@
model_list:
- model_name: openai/gpt-4o
- model_name: gpt-5-mini
litellm_params:
model: openai/gpt-4o-mini
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
api_key: dummy
- model_name: "byok-wildcard/*"
litellm_params:
model: openai/*
- model_name: xai-grok-3
litellm_params:
model: xai/grok-3
- model_name: hosted_vllm/whisper-v3
litellm_params:
model: hosted_vllm/whisper-v3
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
api_key: dummy
# mcp_servers:
# github_mcp:
# url: "https://api.githubcopilot.com/mcp"
# auth_type: oauth2
# authorization_url: https://github.com/login/oauth/authorize
# token_url: https://github.com/login/oauth/access_token
# client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
# client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
# scopes: ["public_repo", "user:email"]
# allowed_tools: ["list_tools"]
# # disallowed_tools: ["repo_delete"]
litellm_settings:
callbacks: ["prometheus"]
custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"]
model: azure/gpt-5-mini-2
api_key: os.environ/AZURE_API_KEY_ALT
api_base: os.environ/AZURE_API_BASE_ALT
@@ -19,6 +19,7 @@ from litellm.types.llms.openai import (
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from base_responses_api import BaseResponsesAPITest
class TestAzureResponsesAPITest(BaseResponsesAPITest):
def get_base_completion_call_args(self):
return {
@@ -43,4 +44,55 @@ async def test_azure_responses_api_preview_api_version():
api_base=os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
api_key=os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
input="Hello, can you tell me a short joke?",
)
)
@pytest.mark.asyncio
async def test_azure_responses_api_status_error():
"""
Ensure new azure preview api version is working
"""
litellm._turn_on_debug()
request_data = {
"model": "gpt-5-mini",
"input": [
{"content": "tell me an interesting fact", "role": "user"},
{
"id": "rs_0ab687487834d9df0068e462a1b2d88197aabbc832c9ba5316",
"summary": [],
"type": "reasoning",
"content": None,
"encrypted_content": None,
"status": "completed",
},
{
"id": "msg_0ab687487834d9df0068e462a1df188197b74b1eef05102c18",
"content": [
{
"annotations": [],
"text": "Octopuses have three hearts: two pump blood to the gills, while the third pumps it to the rest of the body. Even more unusual, their blood is blue because it uses the copper-containing protein hemocyanin to carry oxygen, which is more efficient than hemoglobin in cold, low-oxygen environments.",
"type": "output_text",
"logprobs": [],
}
],
"role": "assistant",
"status": "completed",
"type": "message",
},
{"role": "user", "content": "tell me another"},
],
"include": [],
"instructions": "You are a helpful assistant.",
"reasoning": {"effort": "minimal"},
"stream": False,
"tools": [],
}
response = await litellm.aresponses(
model="azure/gpt-5-mini-2",
truncation="auto",
api_version="preview",
api_base=os.getenv("AZURE_GPT5_MINI_API_BASE"),
api_key=os.getenv("AZURE_GPT5_MINI_API_KEY"),
input=request_data["input"],
)