forked from stitionai/devika
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: Implement rate limit handling with 60s wait
- Add 60-second wait on HTTP 429 responses - Properly handle Groq API rate limit errors - Add comprehensive test coverage - Improve error messaging and logging Fixes stitionai#524 Co-Authored-By: Erkin Alp Güney <[email protected]>
- Loading branch information
1 parent
9ec4699
commit 73c14f2
Showing
4 changed files
with
117 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import pytest | ||
from unittest.mock import Mock, patch | ||
from requests.exceptions import HTTPError | ||
|
||
from src.llm.groq_client import Groq | ||
|
||
|
||
def test_groq_rate_limit_handling(): | ||
groq = Groq() | ||
|
||
# Mock the Groq client to simulate rate limit error | ||
mock_client = Mock() | ||
mock_client.chat.completions.create.side_effect = Exception( | ||
'Rate limit reached for model `mixtral-8x7b-32768`. Please try again in 7.164s.' | ||
) | ||
groq.client = mock_client | ||
|
||
# Test that rate limit error is converted to HTTPError | ||
with pytest.raises(HTTPError) as exc_info: | ||
groq.inference("mixtral-8x7b-32768", "test prompt") | ||
|
||
assert exc_info.value.response.status_code == 429 | ||
assert "rate limit" in str(exc_info.value.response.content.decode()).lower() | ||
|
||
|
||
def test_groq_other_error_handling(): | ||
groq = Groq() | ||
|
||
# Mock the Groq client to simulate other error | ||
mock_client = Mock() | ||
mock_client.chat.completions.create.side_effect = Exception("Some other error") | ||
groq.client = mock_client | ||
|
||
# Test that other errors are re-raised as-is | ||
with pytest.raises(Exception) as exc_info: | ||
groq.inference("mixtral-8x7b-32768", "test prompt") | ||
|
||
assert "Some other error" in str(exc_info.value) | ||
assert not isinstance(exc_info.value, HTTPError) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import pytest | ||
import time | ||
import requests | ||
from unittest.mock import Mock, patch | ||
from src.services.utils import retry_wrapper | ||
|
||
def test_retry_wrapper_rate_limit(): | ||
# Mock a function that raises rate limit error | ||
@retry_wrapper | ||
def rate_limited_func(): | ||
response = Mock(spec=requests.Response) | ||
response.status_code = 429 | ||
response.json.return_value = { | ||
'error': { | ||
'message': 'Rate limit reached', | ||
'type': 'tokens', | ||
'code': 'rate_limit_exceeded' | ||
} | ||
} | ||
raise requests.exceptions.HTTPError(response=response) | ||
|
||
# Test that it waits 60 seconds on rate limit | ||
with patch('time.sleep') as mock_sleep: | ||
with pytest.raises(requests.exceptions.HTTPError): | ||
rate_limited_func() | ||
# Verify it attempted to sleep for 60 seconds | ||
assert mock_sleep.call_args[0][0] == 60 | ||
|
||
def test_retry_wrapper_other_errors(): | ||
# Mock a function that raises other HTTP errors | ||
@retry_wrapper | ||
def other_error_func(): | ||
response = Mock(spec=requests.Response) | ||
response.status_code = 500 | ||
raise requests.exceptions.HTTPError(response=response) | ||
|
||
# Test that it retries with default backoff | ||
with patch('time.sleep') as mock_sleep: | ||
with pytest.raises(requests.exceptions.HTTPError): | ||
other_error_func() | ||
# Verify it used shorter retry delays | ||
assert all(call[0][0] < 60 for call in mock_sleep.call_args_list) |