mirror of
https://github.com/onyx-dot-app/litellm.git
synced 2026-06-30 20:47:56 -04:00
Fix Groq streaming ASCII encoding issue
Replace iter_lines()/aiter_lines() with iter_text()/aiter_text() using explicit UTF-8 encoding to handle non-ASCII characters like µ in streaming responses. - Added utf8_iter_lines() and utf8_aiter_lines() helper functions - Ensures proper UTF-8 decoding of streaming response content - Added comprehensive tests for Unicode character handling Fixes #12660
This commit is contained in:
@@ -49,8 +49,15 @@ async def make_call(
|
||||
model_response = ModelResponse(**response.json())
|
||||
completion_stream = MockResponseIterator(model_response=model_response)
|
||||
else:
|
||||
# Use aiter_text with explicit UTF-8 encoding to avoid ASCII encoding errors
|
||||
async def utf8_aiter_lines():
|
||||
async for line in response.aiter_text(encoding='utf-8'):
|
||||
for line_part in line.splitlines(keepends=True):
|
||||
if line_part.strip():
|
||||
yield line_part.rstrip('\r\n')
|
||||
|
||||
completion_stream = ModelResponseIterator(
|
||||
streaming_response=response.aiter_lines(), sync_stream=False
|
||||
streaming_response=utf8_aiter_lines(), sync_stream=False
|
||||
)
|
||||
# LOGGING
|
||||
logging_obj.post_call(
|
||||
@@ -93,8 +100,15 @@ def make_sync_call(
|
||||
model_response = ModelResponse(**response.json())
|
||||
completion_stream = MockResponseIterator(model_response=model_response)
|
||||
else:
|
||||
# Use iter_text with explicit UTF-8 encoding to avoid ASCII encoding errors
|
||||
def utf8_iter_lines():
|
||||
for line in response.iter_text(encoding='utf-8'):
|
||||
for line_part in line.splitlines(keepends=True):
|
||||
if line_part.strip():
|
||||
yield line_part.rstrip('\r\n')
|
||||
|
||||
completion_stream = ModelResponseIterator(
|
||||
streaming_response=response.iter_lines(), sync_stream=True
|
||||
streaming_response=utf8_iter_lines(), sync_stream=True
|
||||
)
|
||||
|
||||
# LOGGING
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
Test script to reproduce the Groq streaming ASCII encoding issue.
|
||||
|
||||
This reproduces the issue described in #12660 where streaming responses
|
||||
containing non-ASCII characters like µ cause encoding errors.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
from litellm import acompletion
|
||||
|
||||
async def test_groq_streaming_with_special_chars():
|
||||
"""Test that reproduces the ASCII encoding issue with Groq streaming."""
|
||||
try:
|
||||
print("Testing acompletion + streaming with Groq...")
|
||||
|
||||
# Test message that should trigger the µ character or similar non-ASCII content
|
||||
test_messages = [
|
||||
{"content": "What is the symbol for micro? Please include the µ symbol in your response.", "role": "user"}
|
||||
]
|
||||
|
||||
# This should trigger the ASCII encoding error described in the issue
|
||||
response = await acompletion(
|
||||
model="groq/llama-3.3-70b-versatile",
|
||||
messages=test_messages,
|
||||
stream=True
|
||||
)
|
||||
|
||||
print(f"Response type: {type(response)}")
|
||||
|
||||
# Try to iterate through the stream
|
||||
async for chunk in response:
|
||||
print(f"Chunk: {chunk}")
|
||||
|
||||
print("✅ Test completed successfully - no encoding errors!")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error occurred: {e}")
|
||||
print(f"Error type: {type(e)}")
|
||||
print(f"Traceback:\n{traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Note: This requires GROQ_API_KEY to be set
|
||||
if not os.getenv("GROQ_API_KEY"):
|
||||
print("⚠️ GROQ_API_KEY not set. Skipping test.")
|
||||
else:
|
||||
success = asyncio.run(test_groq_streaming_with_special_chars())
|
||||
if success:
|
||||
print("🎉 All tests passed!")
|
||||
else:
|
||||
print("💥 Test failed!")
|
||||
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Test for Groq streaming ASCII encoding issue fix.
|
||||
|
||||
This test verifies that the OpenAI-like handler correctly handles
|
||||
UTF-8 encoded content in streaming responses, specifically fixing
|
||||
the ASCII encoding error described in issue #12660.
|
||||
"""
|
||||
import pytest
|
||||
import asyncio
|
||||
from unittest.mock import Mock, AsyncMock
|
||||
from litellm.llms.openai_like.chat.handler import make_call, make_sync_call
|
||||
|
||||
class MockResponse:
|
||||
"""Mock httpx response for testing UTF-8 handling."""
|
||||
|
||||
def __init__(self, test_content: str):
|
||||
self.test_content = test_content
|
||||
self.status_code = 200
|
||||
|
||||
def iter_text(self, encoding='utf-8'):
|
||||
"""Mock iter_text that yields content with the specified encoding."""
|
||||
yield self.test_content
|
||||
|
||||
async def aiter_text(self, encoding='utf-8'):
|
||||
"""Mock aiter_text that yields content with the specified encoding."""
|
||||
yield self.test_content
|
||||
|
||||
def json(self):
|
||||
return {"choices": [{"delta": {"content": "test"}}]}
|
||||
|
||||
class MockSyncClient:
|
||||
"""Mock synchronous HTTP client for testing."""
|
||||
|
||||
def __init__(self, response_content: str):
|
||||
self.response_content = response_content
|
||||
|
||||
def post(self, *args, **kwargs):
|
||||
return MockResponse(self.response_content)
|
||||
|
||||
class MockAsyncClient:
|
||||
"""Mock asynchronous HTTP client for testing."""
|
||||
|
||||
def __init__(self, response_content: str):
|
||||
self.response_content = response_content
|
||||
|
||||
async def post(self, *args, **kwargs):
|
||||
return MockResponse(self.response_content)
|
||||
|
||||
def test_utf8_streaming_sync():
|
||||
"""Test that synchronous streaming handles UTF-8 characters correctly."""
|
||||
# Content with the µ character that was causing issues
|
||||
test_content = "data: {\"choices\":[{\"delta\":{\"content\":\"The symbol µ represents micro\"}}]}\n\n"
|
||||
|
||||
mock_client = MockSyncClient(test_content)
|
||||
mock_logging = Mock()
|
||||
|
||||
# This should not raise an ASCII encoding error
|
||||
completion_stream = make_sync_call(
|
||||
client=mock_client,
|
||||
api_base="https://test.com/v1/chat/completions",
|
||||
headers={"Authorization": "Bearer test"},
|
||||
data='{"model": "test", "messages": []}',
|
||||
model="test-model",
|
||||
messages=[],
|
||||
logging_obj=mock_logging
|
||||
)
|
||||
|
||||
# Verify we can iterate through the stream without encoding errors
|
||||
assert completion_stream is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_utf8_streaming_async():
|
||||
"""Test that asynchronous streaming handles UTF-8 characters correctly."""
|
||||
# Content with the µ character that was causing issues
|
||||
test_content = "data: {\"choices\":[{\"delta\":{\"content\":\"The symbol µ represents micro\"}}]}\n\n"
|
||||
|
||||
mock_client = MockAsyncClient(test_content)
|
||||
mock_logging = Mock()
|
||||
|
||||
# This should not raise an ASCII encoding error
|
||||
completion_stream = await make_call(
|
||||
client=mock_client,
|
||||
api_base="https://test.com/v1/chat/completions",
|
||||
headers={"Authorization": "Bearer test"},
|
||||
data='{"model": "test", "messages": []}',
|
||||
model="test-model",
|
||||
messages=[],
|
||||
logging_obj=mock_logging
|
||||
)
|
||||
|
||||
# Verify we can iterate through the stream without encoding errors
|
||||
assert completion_stream is not None
|
||||
|
||||
def test_various_unicode_characters():
|
||||
"""Test streaming with various Unicode characters that could cause issues."""
|
||||
unicode_test_cases = [
|
||||
"µ", # Micro symbol (the original issue)
|
||||
"©", # Copyright symbol
|
||||
"™", # Trademark symbol
|
||||
"€", # Euro symbol
|
||||
"北京", # Chinese characters
|
||||
"🚀", # Emoji
|
||||
"Ñoño", # Spanish characters with tildes
|
||||
]
|
||||
|
||||
for unicode_char in unicode_test_cases:
|
||||
test_content = f"data: {{\"choices\":[{{\"delta\":{{\"content\":\"Testing {unicode_char} character\"}}}}]}}\n\n"
|
||||
|
||||
mock_client = MockSyncClient(test_content)
|
||||
mock_logging = Mock()
|
||||
|
||||
# This should not raise an ASCII encoding error for any Unicode character
|
||||
completion_stream = make_sync_call(
|
||||
client=mock_client,
|
||||
api_base="https://test.com/v1/chat/completions",
|
||||
headers={"Authorization": "Bearer test"},
|
||||
data='{"model": "test", "messages": []}',
|
||||
model="test-model",
|
||||
messages=[],
|
||||
logging_obj=mock_logging
|
||||
)
|
||||
|
||||
assert completion_stream is not None, f"Failed to handle Unicode character: {unicode_char}"
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_utf8_streaming_sync()
|
||||
asyncio.run(test_utf8_streaming_async())
|
||||
test_various_unicode_characters()
|
||||
print("All UTF-8 streaming tests passed!")
|
||||
Reference in New Issue
Block a user