diff --git a/python/pyproject.toml b/python/pyproject.toml index 3dd580cdf470..bf30da7ef96e 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -86,7 +86,7 @@ ollama = [ "ollama ~= 0.4" ] onnx = [ - "onnxruntime-genai ~= 0.4; platform_system != 'Darwin'" + "onnxruntime-genai ~= 0.5" ] anthropic = [ "anthropic ~= 0.32" @@ -156,7 +156,8 @@ filterwarnings = [ ] timeout = 120 markers = [ - "ollama: mark a test as requiring the Ollama service (use \"not ollama\" to skip those tests)" + "ollama: mark a test as requiring the Ollama service (use \"not ollama\" to skip those tests)", + "onnx: mark a test as requiring the Onnx service (use \"not onnx\" to skip those tests)" ] [tool.ruff] diff --git a/python/tests/integration/completions/chat_completion_test_base.py b/python/tests/integration/completions/chat_completion_test_base.py index d05157e607c5..7a4db5b8fcfe 100644 --- a/python/tests/integration/completions/chat_completion_test_base.py +++ b/python/tests/integration/completions/chat_completion_test_base.py @@ -2,7 +2,6 @@ import os -import platform import sys from typing import Annotated @@ -22,6 +21,7 @@ from semantic_kernel.connectors.ai.google.vertex_ai import VertexAIChatCompletion, VertexAIChatPromptExecutionSettings from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings +from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate from semantic_kernel.connectors.ai.open_ai import ( AzureChatCompletion, AzureChatPromptExecutionSettings, @@ -71,12 +71,6 @@ bedrock_setup: bool = is_service_setup_for_testing(["AWS_DEFAULT_REGION"], raise_if_not_set=False) -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings - from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate - - # A mock plugin that contains a function that returns a complex object. class PersonDetails(KernelBaseModel): id: str @@ -155,7 +149,7 @@ def services(self) -> dict[str, tuple[ServiceType | None, type[PromptExecutionSe "vertex_ai": (VertexAIChatCompletion() if vertex_ai_setup else None, VertexAIChatPromptExecutionSettings), "onnx_gen_ai": ( OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3V) if onnx_setup else None, - OnnxGenAIPromptExecutionSettings if not skip_on_mac_available else None, + OnnxGenAIPromptExecutionSettings, ), "bedrock_amazon_titan": ( BedrockChatCompletion(model_id="amazon.titan-text-premier-v1:0") if bedrock_setup else None, diff --git a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py index f45f3367268c..2eb52e22f1e5 100644 --- a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py +++ b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py @@ -118,7 +118,10 @@ ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="Where was it made?")]), ], {}, - marks=pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + marks=( + pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + pytest.mark.onnx, + ), id="onnx_gen_ai_image_input_file", ), pytest.param( diff --git a/python/tests/integration/completions/test_chat_completions.py b/python/tests/integration/completions/test_chat_completions.py index e3a77542f0f6..67eab08b0a90 100644 --- a/python/tests/integration/completions/test_chat_completions.py +++ b/python/tests/integration/completions/test_chat_completions.py @@ -150,7 +150,10 @@ class Reasoning(KernelBaseModel): ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="How are you today?")]), ], {}, - marks=pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + marks=( + pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + pytest.mark.onnx, + ), id="onnx_gen_ai", ), # endregion diff --git a/python/tests/integration/completions/test_text_completion.py b/python/tests/integration/completions/test_text_completion.py index 3e9b34ef76aa..7e6792de4465 100644 --- a/python/tests/integration/completions/test_text_completion.py +++ b/python/tests/integration/completions/test_text_completion.py @@ -1,6 +1,5 @@ # Copyright (c) Microsoft. All rights reserved. -import platform import sys from functools import partial from typing import Any @@ -19,6 +18,7 @@ from semantic_kernel.connectors.ai.google.vertex_ai import VertexAITextCompletion, VertexAITextPromptExecutionSettings from semantic_kernel.connectors.ai.hugging_face import HuggingFacePromptExecutionSettings, HuggingFaceTextCompletion from semantic_kernel.connectors.ai.ollama import OllamaTextCompletion, OllamaTextPromptExecutionSettings +from semantic_kernel.connectors.ai.onnx import OnnxGenAIPromptExecutionSettings, OnnxGenAITextCompletion from semantic_kernel.connectors.ai.open_ai import ( AzureOpenAISettings, AzureTextCompletion, @@ -43,11 +43,6 @@ ) # Tests are optional for ONNX bedrock_setup = is_service_setup_for_testing(["AWS_DEFAULT_REGION"], raise_if_not_set=False) -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx import OnnxGenAIPromptExecutionSettings, OnnxGenAITextCompletion - - pytestmark = pytest.mark.parametrize( "service_id, execution_settings_kwargs, inputs, kwargs", [ @@ -128,7 +123,10 @@ {}, ["<|user|>Repeat the word Hello<|end|><|assistant|>"], {}, - marks=pytest.mark.skipif(not onnx_setup, reason="Need local Onnx setup"), + marks=( + pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + pytest.mark.onnx, + ), id="onnx_gen_ai_text_completion", ), pytest.param( @@ -242,7 +240,7 @@ def services(self) -> dict[str, tuple[ServiceType | None, type[PromptExecutionSe ), "onnx_gen_ai": ( OnnxGenAITextCompletion() if onnx_setup else None, - OnnxGenAIPromptExecutionSettings if not skip_on_mac_available else None, + OnnxGenAIPromptExecutionSettings, ), # Amazon Bedrock supports models from multiple providers but requests to and responses from the models are # inconsistent. So we need to test each model separately. diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py index 5454f4d52504..30c9573fef6c 100644 --- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py +++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py @@ -1,185 +1,186 @@ # Copyright (c) Microsoft. All rights reserved. import json import os -import platform from unittest.mock import MagicMock, mock_open, patch import pytest +from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent, ImageContent from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidExecutionSettingsError from semantic_kernel.kernel import Kernel from tests.unit.connectors.ai.onnx.conftest import gen_ai_config, gen_ai_config_vision -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx import ( # noqa: E402 - OnnxGenAIChatCompletion, - OnnxGenAIPromptExecutionSettings, - ) - from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate # noqa: E402 - - -@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS") -class TestOnnxChatCompletion: - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_valid_env_variable(self, gen_ai_config, model, tokenizer, onnx_unit_test_env): - service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") - assert not service.enable_multi_modality - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_vision_valid_env_variable( - self, gen_ai_vision_config, model, tokenizer, onnx_unit_test_env - ): - service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") - assert service.enable_multi_modality - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_valid_parameter(self, gen_ai_config, model, tokenizer): - assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template=ONNXTemplate.PHI3) - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_str_template(self, gen_ai_config, model, tokenizer): - assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template="phi3") - - def test_onnx_chat_completion_with_invalid_model(self): - with pytest.raises(ServiceInitializationError): - OnnxGenAIChatCompletion( - ai_model_path="/invalid_path", - template=ONNXTemplate.PHI3, - ) - - def test_onnx_chat_completion_without_prompt_template(self): - with pytest.raises(TypeError): - OnnxGenAIChatCompletion() - - def test_onnx_chat_completion_with_invalid_env_variable(self, onnx_unit_test_env): - with pytest.raises(ServiceInitializationError): - OnnxGenAIChatCompletion( - template=ONNXTemplate.PHI3, - ) - - @pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_CHAT_MODEL_FOLDER"]], indirect=True) - def test_onnx_chat_completion_with_missing_ai_path(self, onnx_unit_test_env): - with pytest.raises(ServiceInitializationError): - OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_chat_completion(self, gen_ai_config, model, tokenizer): - generator_mock = MagicMock() - generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] - - chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test") - history = ChatHistory() - history.add_system_message("test") - history.add_user_message("test") +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_valid_env_variable(gen_ai_config, model, tokenizer, onnx_unit_test_env): + service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") + assert not service.enable_multi_modality + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_vision_valid_env_variable( + gen_ai_vision_config, model, tokenizer, onnx_unit_test_env +): + service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") + assert service.enable_multi_modality + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_valid_parameter(gen_ai_config, model, tokenizer): + assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template=ONNXTemplate.PHI3) + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_str_template(gen_ai_config, model, tokenizer): + assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template="phi3") + + +def test_onnx_chat_completion_with_invalid_model(): + with pytest.raises(ServiceInitializationError): + OnnxGenAIChatCompletion( + ai_model_path="/invalid_path", + template=ONNXTemplate.PHI3, + ) + + +def test_onnx_chat_completion_without_prompt_template(): + with pytest.raises(TypeError): + OnnxGenAIChatCompletion() + + +def test_onnx_chat_completion_with_invalid_env_variable(onnx_unit_test_env): + with pytest.raises(ServiceInitializationError): + OnnxGenAIChatCompletion( + template=ONNXTemplate.PHI3, + ) + + +@pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_CHAT_MODEL_FOLDER"]], indirect=True) +def test_onnx_chat_completion_with_missing_ai_path(onnx_unit_test_env): + with pytest.raises(ServiceInitializationError): + OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") - with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock): - completed_text: ChatMessageContent = await chat_completion.get_chat_message_content( - prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel() - ) - assert str(completed_text) == "Hello" +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_chat_completion(gen_ai_config, model, tokenizer): + generator_mock = MagicMock() + generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_chat_completion_streaming(self, gen_ai_config, model, tokenizer): - generator_mock = MagicMock() - generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] + chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test") - chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test") + history = ChatHistory() + history.add_system_message("test") + history.add_user_message("test") + + with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock): + completed_text: ChatMessageContent = await chat_completion.get_chat_message_content( + prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel() + ) + + assert str(completed_text) == "Hello" + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_chat_completion_streaming(gen_ai_config, model, tokenizer): + generator_mock = MagicMock() + generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] + + chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test") + + history = ChatHistory() + history.add_system_message("test") + history.add_user_message("test") + + completed_text: str = "" + + with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock): + async for chunk in chat_completion.get_streaming_chat_message_content( + prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel() + ): + completed_text += str(chunk) + + assert completed_text == "Hello" + + +@patch("onnxruntime_genai.Model") +def test_onnx_chat_get_image_history(model): + builtin_open = open # save the unpatched version + + def patch_open(*args, **kwargs): + if "genai_config.json" in str(args[0]): + # mocked open for path "genai_config.json" + return mock_open(read_data=json.dumps(gen_ai_config_vision))(*args, **kwargs) + # unpatched version for every other path + return builtin_open(*args, **kwargs) + + with patch("builtins.open", patch_open): + chat_completion = OnnxGenAIChatCompletion( + template=ONNXTemplate.PHI3, + ai_model_path="test", + ) + + image_content = ImageContent.from_image_path( + image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg") + ) history = ChatHistory() history.add_system_message("test") history.add_user_message("test") + history.add_message( + ChatMessageContent( + role=AuthorRole.USER, + items=[image_content], + ), + ) + + last_image = chat_completion._get_images_from_history(history) + assert last_image == image_content + + +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_chat_get_image_history_with_not_multimodal(model, tokenizer): + builtin_open = open # save the unpatched version + + def patch_open(*args, **kwargs): + if "genai_config.json" in str(args[0]): + # mocked open for path "genai_config.json" + return mock_open(read_data=json.dumps(gen_ai_config))(*args, **kwargs) + # unpatched version for every other path + return builtin_open(*args, **kwargs) + + with patch("builtins.open", patch_open): + chat_completion = OnnxGenAIChatCompletion( + template=ONNXTemplate.PHI3, + ai_model_path="test", + ) + + image_content = ImageContent.from_image_path( + image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg") + ) - completed_text: str = "" - - with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock): - async for chunk in chat_completion.get_streaming_chat_message_content( - prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel() - ): - completed_text += str(chunk) - - assert completed_text == "Hello" - - @patch("onnxruntime_genai.Model") - def test_onnx_chat_get_image_history(self, model): - builtin_open = open # save the unpatched version - - def patch_open(*args, **kwargs): - if "genai_config.json" in str(args[0]): - # mocked open for path "genai_config.json" - return mock_open(read_data=json.dumps(gen_ai_config_vision))(*args, **kwargs) - # unpatched version for every other path - return builtin_open(*args, **kwargs) - - with patch("builtins.open", patch_open): - chat_completion = OnnxGenAIChatCompletion( - template=ONNXTemplate.PHI3, - ai_model_path="test", - ) - - image_content = ImageContent.from_image_path( - image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg") - ) - - history = ChatHistory() - history.add_system_message("test") - history.add_user_message("test") - history.add_message( - ChatMessageContent( - role=AuthorRole.USER, - items=[image_content], - ), - ) - - last_image = chat_completion._get_images_from_history(history) - assert last_image == image_content - - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_chat_get_image_history_with_not_multimodal(self, model, tokenizer): - builtin_open = open # save the unpatched version - - def patch_open(*args, **kwargs): - if "genai_config.json" in str(args[0]): - # mocked open for path "genai_config.json" - return mock_open(read_data=json.dumps(gen_ai_config))(*args, **kwargs) - # unpatched version for every other path - return builtin_open(*args, **kwargs) - - with patch("builtins.open", patch_open): - chat_completion = OnnxGenAIChatCompletion( - template=ONNXTemplate.PHI3, - ai_model_path="test", - ) - - image_content = ImageContent.from_image_path( - image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg") - ) - - history = ChatHistory() - history.add_system_message("test") - history.add_user_message("test") - history.add_message( - ChatMessageContent( - role=AuthorRole.USER, - items=[image_content], - ), - ) - - with pytest.raises(ServiceInvalidExecutionSettingsError): - _ = await chat_completion._get_images_from_history(history) + history = ChatHistory() + history.add_system_message("test") + history.add_user_message("test") + history.add_message( + ChatMessageContent( + role=AuthorRole.USER, + items=[image_content], + ), + ) + + with pytest.raises(ServiceInvalidExecutionSettingsError): + _ = await chat_completion._get_images_from_history(history) diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py index 842c34f5cf3b..09435f02667f 100644 --- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py +++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py @@ -1,77 +1,77 @@ # Copyright (c) Microsoft. All rights reserved. import json -import platform from unittest.mock import MagicMock, mock_open, patch import pytest +from semantic_kernel.connectors.ai.onnx import ( # noqa: E402 + OnnxGenAIPromptExecutionSettings, + OnnxGenAITextCompletion, +) from semantic_kernel.contents import TextContent from semantic_kernel.exceptions import ServiceInitializationError from tests.unit.connectors.ai.onnx.conftest import gen_ai_config -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx import ( # noqa: E402 - OnnxGenAIPromptExecutionSettings, - OnnxGenAITextCompletion, - ) - - -@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS") -class TestOnnxTextCompletion: - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_valid_env_variable(self, gen_ai_config, model, tokenizer, onnx_unit_test_env): - assert OnnxGenAITextCompletion(env_file_path="test.env") - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_valid_parameter(self, gen_ai_config, model, tokenizer): - assert OnnxGenAITextCompletion(ai_model_path="/valid_path") - - def test_onnx_chat_completion_with_invalid_model(self): - with pytest.raises(ServiceInitializationError): - OnnxGenAITextCompletion(ai_model_path="/invalid_path") - - def test_onnx_chat_completion_with_invalid_env_variable(self, onnx_unit_test_env): - with pytest.raises(ServiceInitializationError): - OnnxGenAITextCompletion() - - @pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_TEXT_MODEL_FOLDER"]], indirect=True) - def test_onnx_chat_completion_with_missing_ai_path(self, onnx_unit_test_env): - with pytest.raises(ServiceInitializationError): - OnnxGenAITextCompletion(env_file_path="test.env") - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_text_completion(self, gen_ai_config, model, tokenizer): - generator_mock = MagicMock() - generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] - - text_completion = OnnxGenAITextCompletion(ai_model_path="test") - with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock): - completed_text: TextContent = await text_completion.get_text_content( - prompt="test", settings=OnnxGenAIPromptExecutionSettings() - ) - - assert completed_text.text == "Hello" - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_text_completion_streaming(self, gen_ai_config, model, tokenizer): - generator_mock = MagicMock() - generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] - - text_completion = OnnxGenAITextCompletion(ai_model_path="test") - completed_text: str = "" - with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock): - async for chunk in text_completion.get_streaming_text_content( - prompt="test", settings=OnnxGenAIPromptExecutionSettings() - ): - completed_text += chunk.text - - assert completed_text == "Hello" + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_valid_env_variable(gen_ai_config, model, tokenizer, onnx_unit_test_env): + assert OnnxGenAITextCompletion(env_file_path="test.env") + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_valid_parameter(gen_ai_config, model, tokenizer): + assert OnnxGenAITextCompletion(ai_model_path="/valid_path") + + +def test_onnx_chat_completion_with_invalid_model(): + with pytest.raises(ServiceInitializationError): + OnnxGenAITextCompletion(ai_model_path="/invalid_path") + + +def test_onnx_chat_completion_with_invalid_env_variable(onnx_unit_test_env): + with pytest.raises(ServiceInitializationError): + OnnxGenAITextCompletion() + + +@pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_TEXT_MODEL_FOLDER"]], indirect=True) +def test_onnx_chat_completion_with_missing_ai_path(onnx_unit_test_env): + with pytest.raises(ServiceInitializationError): + OnnxGenAITextCompletion(env_file_path="test.env") + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_text_completion(gen_ai_config, model, tokenizer): + generator_mock = MagicMock() + generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] + + text_completion = OnnxGenAITextCompletion(ai_model_path="test") + with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock): + completed_text: TextContent = await text_completion.get_text_content( + prompt="test", settings=OnnxGenAIPromptExecutionSettings() + ) + + assert completed_text.text == "Hello" + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_text_completion_streaming(gen_ai_config, model, tokenizer): + generator_mock = MagicMock() + generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] + + text_completion = OnnxGenAITextCompletion(ai_model_path="test") + completed_text: str = "" + with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock): + async for chunk in text_completion.get_streaming_text_content( + prompt="test", settings=OnnxGenAIPromptExecutionSettings() + ): + completed_text += chunk.text + + assert completed_text == "Hello" diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py index e746a1624a01..b4cb1e281826 100644 --- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py +++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py @@ -1,97 +1,93 @@ # Copyright (c) Microsoft. All rights reserved. -import platform +from semantic_kernel.connectors.ai.onnx.utils import ( + gemma_template, + llama_template, + phi3_template, + phi3v_template, +) +from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent -import pytest -from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent +def test_phi3v_template_with_text_and_image(): + history = ChatHistory( + messages=[ + {"role": AuthorRole.SYSTEM, "content": "System message"}, + { + "role": AuthorRole.USER, + "items": [TextContent(text="User text message"), ImageContent(url="http://example.com/image.png")], + }, + {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, + ] + ) + + expected_output = ( + "<|system|>\nSystem message<|end|>\n" + "<|user|>\nUser text message<|end|>\n" + "<|image_1|>\n" + "<|assistant|>\nAssistant message<|end|>\n" + "<|assistant|>\n" + ) + + assert phi3v_template(history) == expected_output + -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx.utils import ( # noqa: E402 - gemma_template, - llama_template, - phi3_template, - phi3v_template, +def test_phi3_template_with_only_text(): + history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}]) + + expected_output = "<|user|>\nUser text message<|end|>\n<|assistant|>\n" + + assert phi3_template(history) == expected_output + + +def test_gemma_template_with_user_and_assistant_messages(): + history = ChatHistory( + messages=[ + {"role": AuthorRole.USER, "content": "User text message"}, + {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, + ] ) + expected_output = ( + "" + "user\nUser text message\n" + "model\nAssistant message\n" + "model\n" + ) + + assert gemma_template(history) == expected_output + + +def test_gemma_template_with_only_user_message(): + history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}]) + + expected_output = "user\nUser text message\nmodel\n" + + assert gemma_template(history) == expected_output + + +def test_llama_template_with_user_and_assistant_messages(): + history = ChatHistory( + messages=[ + {"role": AuthorRole.USER, "content": "User text message"}, + {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, + ] + ) + + expected_output = ( + "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>\n\nAssistant message<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>" + ) + + assert llama_template(history) == expected_output + + +def test_llama_template_with_only_user_message(): + history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}]) + + expected_output = ( + "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>" + ) -@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS") -class TestOnnxUtils: - def test_phi3v_template_with_text_and_image(self): - history = ChatHistory( - messages=[ - {"role": AuthorRole.SYSTEM, "content": "System message"}, - { - "role": AuthorRole.USER, - "items": [TextContent(text="User text message"), ImageContent(url="http://example.com/image.png")], - }, - {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, - ] - ) - - expected_output = ( - "<|system|>\nSystem message<|end|>\n" - "<|user|>\nUser text message<|end|>\n" - "<|image_1|>\n" - "<|assistant|>\nAssistant message<|end|>\n" - "<|assistant|>\n" - ) - - assert phi3v_template(history) == expected_output - - def test_phi3_template_with_only_text(self): - history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}]) - - expected_output = "<|user|>\nUser text message<|end|>\n<|assistant|>\n" - - assert phi3_template(history) == expected_output - - def test_gemma_template_with_user_and_assistant_messages(self): - history = ChatHistory( - messages=[ - {"role": AuthorRole.USER, "content": "User text message"}, - {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, - ] - ) - - expected_output = ( - "" - "user\nUser text message\n" - "model\nAssistant message\n" - "model\n" - ) - - assert gemma_template(history) == expected_output - - def test_gemma_template_with_only_user_message(self): - history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}]) - - expected_output = "user\nUser text message\nmodel\n" - - assert gemma_template(history) == expected_output - - def test_llama_template_with_user_and_assistant_messages(self): - history = ChatHistory( - messages=[ - {"role": AuthorRole.USER, "content": "User text message"}, - {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, - ] - ) - - expected_output = ( - "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>" - "<|start_header_id|>assistant<|end_header_id|>\n\nAssistant message<|eot_id|>" - "<|start_header_id|>assistant<|end_header_id|>" - ) - - assert llama_template(history) == expected_output - - def test_llama_template_with_only_user_message(self): - history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}]) - - expected_output = ( - "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>" - "<|start_header_id|>assistant<|end_header_id|>" - ) - - assert llama_template(history) == expected_output + assert llama_template(history) == expected_output diff --git a/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py b/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py index 80c84619d0a2..3d6942ecb8a5 100644 --- a/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py +++ b/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py @@ -1,85 +1,84 @@ # Copyright (c) Microsoft. All rights reserved. -import platform - import pytest from pydantic import ValidationError +from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import ( + OnnxGenAIPromptExecutionSettings, +) from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import ( - OnnxGenAIPromptExecutionSettings, + +def test_default_onnx_chat_prompt_execution_settings(): + settings = OnnxGenAIPromptExecutionSettings() + assert settings.temperature is None + assert settings.top_p is None + + +def test_custom_onnx_chat_prompt_execution_settings(): + settings = OnnxGenAIPromptExecutionSettings( + temperature=0.5, + top_p=0.5, + max_length=128, ) + assert settings.temperature == 0.5 + assert settings.top_p == 0.5 + assert settings.max_length == 128 -@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS") -class TestOnnxPromptExecutionSettings: - def test_default_onnx_chat_prompt_execution_settings(self): - settings = OnnxGenAIPromptExecutionSettings() - assert settings.temperature is None - assert settings.top_p is None +def test_onnx_chat_prompt_execution_settings_from_default_completion_config(): + settings = PromptExecutionSettings(service_id="test_service") + chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings) + assert chat_settings.service_id == "test_service" + assert chat_settings.temperature is None + assert chat_settings.top_p is None - def test_custom_onnx_chat_prompt_execution_settings(self): - settings = OnnxGenAIPromptExecutionSettings( - temperature=0.5, - top_p=0.5, - max_length=128, - ) - assert settings.temperature == 0.5 - assert settings.top_p == 0.5 - assert settings.max_length == 128 - def test_onnx_chat_prompt_execution_settings_from_default_completion_config(self): - settings = PromptExecutionSettings(service_id="test_service") - chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings) - assert chat_settings.service_id == "test_service" - assert chat_settings.temperature is None - assert chat_settings.top_p is None +def test_onnx_chat_prompt_execution_settings_from_onnx_prompt_execution_settings(): + chat_settings = OnnxGenAIPromptExecutionSettings(service_id="test_service", temperature=1.0) + new_settings = OnnxGenAIPromptExecutionSettings(service_id="test_2", temperature=0.0) + chat_settings.update_from_prompt_execution_settings(new_settings) + assert chat_settings.service_id == "test_2" + assert chat_settings.temperature == 0.0 - def test_onnx_chat_prompt_execution_settings_from_onnx_prompt_execution_settings(self): - chat_settings = OnnxGenAIPromptExecutionSettings(service_id="test_service", temperature=1.0) - new_settings = OnnxGenAIPromptExecutionSettings(service_id="test_2", temperature=0.0) - chat_settings.update_from_prompt_execution_settings(new_settings) - assert chat_settings.service_id == "test_2" - assert chat_settings.temperature == 0.0 - def test_onnx_chat_prompt_execution_settings_from_custom_completion_config(self): - settings = PromptExecutionSettings( - service_id="test_service", - extension_data={ - "temperature": 0.5, - "top_p": 0.5, - "max_length": 128, - }, - ) - chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings) - assert chat_settings.temperature == 0.5 - assert chat_settings.top_p == 0.5 - assert chat_settings.max_length == 128 +def test_onnx_chat_prompt_execution_settings_from_custom_completion_config(): + settings = PromptExecutionSettings( + service_id="test_service", + extension_data={ + "temperature": 0.5, + "top_p": 0.5, + "max_length": 128, + }, + ) + chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings) + assert chat_settings.temperature == 0.5 + assert chat_settings.top_p == 0.5 + assert chat_settings.max_length == 128 + - def test_create_options(self): - settings = OnnxGenAIPromptExecutionSettings( +def test_create_options(): + settings = OnnxGenAIPromptExecutionSettings( + service_id="test_service", + extension_data={ + "temperature": 0.5, + "top_p": 0.5, + "max_length": 128, + }, + ) + options = settings.prepare_settings_dict() + assert options["temperature"] == 0.5 + assert options["top_p"] == 0.5 + assert options["max_length"] == 128 + + +def test_create_options_with_wrong_parameter(): + with pytest.raises(ValidationError): + OnnxGenAIPromptExecutionSettings( service_id="test_service", + function_choice_behavior="auto", extension_data={ - "temperature": 0.5, + "temperature": 10.0, "top_p": 0.5, "max_length": 128, }, ) - options = settings.prepare_settings_dict() - assert options["temperature"] == 0.5 - assert options["top_p"] == 0.5 - assert options["max_length"] == 128 - - def test_create_options_with_wrong_parameter(self): - with pytest.raises(ValidationError): - OnnxGenAIPromptExecutionSettings( - service_id="test_service", - function_choice_behavior="auto", - extension_data={ - "temperature": 10.0, - "top_p": 0.5, - "max_length": 128, - }, - ) diff --git a/python/uv.lock b/python/uv.lock index 471f32675a40..e359cea177f1 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -2694,6 +2694,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/7f/7f/7fbae15a3982dc9595e49ce0f19332423b260045d0a6afe93cdbe2f1f624/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3", size = 363333771 }, { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 }, + { url = "https://files.pythonhosted.org/packages/e2/2a/4f27ca96232e8b5269074a72e03b4e0d43aa68c9b965058b1684d07c6ff8/nvidia_cublas_cu12-12.4.5.8-py3-none-win_amd64.whl", hash = "sha256:5a796786da89203a0657eda402bcdcec6180254a8ac22d72213abc42069522dc", size = 396895858 }, ] [[package]] @@ -2703,6 +2704,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/93/b5/9fb3d00386d3361b03874246190dfec7b206fd74e6e287b26a8fcb359d95/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a", size = 12354556 }, { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 }, + { url = "https://files.pythonhosted.org/packages/f3/79/8cf313ec17c58ccebc965568e5bcb265cdab0a1df99c4e674bb7a3b99bfe/nvidia_cuda_cupti_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:5688d203301ab051449a2b1cb6690fbe90d2b372f411521c86018b950f3d7922", size = 9938035 }, ] [[package]] @@ -2712,6 +2714,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/77/aa/083b01c427e963ad0b314040565ea396f914349914c298556484f799e61b/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198", size = 24133372 }, { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 }, + { url = "https://files.pythonhosted.org/packages/7c/30/8c844bfb770f045bcd8b2c83455c5afb45983e1a8abf0c4e5297b481b6a5/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:a961b2f1d5f17b14867c619ceb99ef6fcec12e46612711bcec78eb05068a60ec", size = 19751955 }, ] [[package]] @@ -2721,6 +2724,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/a1/aa/b656d755f474e2084971e9a297def515938d56b466ab39624012070cb773/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3", size = 894177 }, { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 }, + { url = "https://files.pythonhosted.org/packages/a8/8b/450e93fab75d85a69b50ea2d5fdd4ff44541e0138db16f9cd90123ef4de4/nvidia_cuda_runtime_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:09c2e35f48359752dfa822c09918211844a3d93c100a715d79b59591130c5e1e", size = 878808 }, ] [[package]] @@ -2732,6 +2736,7 @@ dependencies = [ ] wheels = [ { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 }, + { url = "https://files.pythonhosted.org/packages/3f/d0/f90ee6956a628f9f04bf467932c0a25e5a7e706a684b896593c06c82f460/nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a", size = 679925892 }, ] [[package]] @@ -2744,6 +2749,7 @@ dependencies = [ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/8a/0e728f749baca3fbeffad762738276e5df60851958be7783af121a7221e7/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399", size = 211422548 }, { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 }, + { url = "https://files.pythonhosted.org/packages/f6/ee/3f3f8e9874f0be5bbba8fb4b62b3de050156d159f8b6edc42d6f1074113b/nvidia_cufft_cu12-11.2.1.3-py3-none-win_amd64.whl", hash = "sha256:d802f4954291101186078ccbe22fc285a902136f974d369540fd4a5333d1440b", size = 210576476 }, ] [[package]] @@ -2753,6 +2759,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/80/9c/a79180e4d70995fdf030c6946991d0171555c6edf95c265c6b2bf7011112/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9", size = 56314811 }, { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 }, + { url = "https://files.pythonhosted.org/packages/1c/22/2573503d0d4e45673c263a313f79410e110eb562636b0617856fdb2ff5f6/nvidia_curand_cu12-10.3.5.147-py3-none-win_amd64.whl", hash = "sha256:f307cc191f96efe9e8f05a87096abc20d08845a841889ef78cb06924437f6771", size = 55799918 }, ] [[package]] @@ -2767,6 +2774,7 @@ dependencies = [ wheels = [ { url = "https://files.pythonhosted.org/packages/46/6b/a5c33cf16af09166845345275c34ad2190944bcc6026797a39f8e0a282e0/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e", size = 127634111 }, { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 }, + { url = "https://files.pythonhosted.org/packages/f2/be/d435b7b020e854d5d5a682eb5de4328fd62f6182507406f2818280e206e2/nvidia_cusolver_cu12-11.6.1.9-py3-none-win_amd64.whl", hash = "sha256:e77314c9d7b694fcebc84f58989f3aa4fb4cb442f12ca1a9bde50f5e8f6d1b9c", size = 125224015 }, ] [[package]] @@ -2779,6 +2787,7 @@ dependencies = [ wheels = [ { url = "https://files.pythonhosted.org/packages/96/a9/c0d2f83a53d40a4a41be14cea6a0bf9e668ffcf8b004bd65633f433050c0/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3", size = 207381987 }, { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 }, + { url = "https://files.pythonhosted.org/packages/a2/e0/3155ca539760a8118ec94cc279b34293309bcd14011fc724f87f31988843/nvidia_cusparse_cu12-12.3.1.170-py3-none-win_amd64.whl", hash = "sha256:9bc90fb087bc7b4c15641521f31c0371e9a612fc2ba12c338d3ae032e6b6797f", size = 204684315 }, ] [[package]] @@ -2796,6 +2805,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/02/45/239d52c05074898a80a900f49b1615d81c07fceadd5ad6c4f86a987c0bc4/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83", size = 20552510 }, { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 }, + { url = "https://files.pythonhosted.org/packages/81/19/0babc919031bee42620257b9a911c528f05fb2688520dcd9ca59159ffea8/nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1", size = 95336325 }, ] [[package]] @@ -2805,6 +2815,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/06/39/471f581edbb7804b39e8063d92fc8305bdc7a80ae5c07dbe6ea5c50d14a5/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3", size = 100417 }, { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 }, + { url = "https://files.pythonhosted.org/packages/54/1b/f77674fbb73af98843be25803bbd3b9a4f0a96c75b8d33a2854a5c7d2d77/nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485", size = 66307 }, ] [[package]] @@ -2874,11 +2885,17 @@ dependencies = [ { name = "onnxruntime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/52/35/22a421f852eb14f47c33a4dd4c3ef58a2f3d5a96be8bb6d6cc271b2a0e83/onnxruntime_genai-0.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cd322ead0027fbfa309e7be76c4512157ad369dc189ab3334a58a199b4f58a02", size = 769921 }, + { url = "https://files.pythonhosted.org/packages/7f/1b/5166ed4a73c5e9f92e6db4d7838923ffd595cea164661fae20d82e3a6966/onnxruntime_genai-0.5.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:79d721a33e80a9664aeeb87c0ceec75801fc81e48e8ff7940e3658d0b28f25cc", size = 869111 }, { url = "https://files.pythonhosted.org/packages/12/5b/6f08f9435f0c3977046cb4292ab1e836c22cd7d56fc87ace4d2a90dfb828/onnxruntime_genai-0.5.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd7954f9dc829e69dabd7f676443529ac18171ec8077438c16364d381733070e", size = 1380370 }, { url = "https://files.pythonhosted.org/packages/57/d6/91e486424f924c2a99e8f1bd201180979101ecc09bee1ca7f53dae1c8a38/onnxruntime_genai-0.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:4d2968df6d8064664a5f095006c70520f4ca689204b695e88951f088477bc1e0", size = 776263 }, + { url = "https://files.pythonhosted.org/packages/3e/3d/e2d8f89c05c6cf35e2ade2b335b1b97725327591b8fb141d266ab98615f9/onnxruntime_genai-0.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:217c748f8ebd1a5082e1ad8ee8fc90fc1a4e9ce7839189f4c2c2545d1390af15", size = 769888 }, + { url = "https://files.pythonhosted.org/packages/33/13/66ffa143cc82f8352ec87ba0501bc21e05dd9e84fbbad530e74a705ac911/onnxruntime_genai-0.5.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6194aabd589b3ffb571b325f504266ac47c33c434abfd87575c30d7a3e1179c9", size = 869092 }, { url = "https://files.pythonhosted.org/packages/6a/17/a29c0cf89d90374234b8e510fcb970f2e043b42689b5ea23cbdab5a414b6/onnxruntime_genai-0.5.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:88edb36c9e2d670316f1e6e4ce27a86f212648a92053a94a31f88b1f4d6c0935", size = 1380461 }, { url = "https://files.pythonhosted.org/packages/59/b1/acb1daf1a08c8098c828e7ea9e187b9728a8fc151a4df4911f988c08a874/onnxruntime_genai-0.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:09b697f955616156948f21366d13d02884a15521926f68a259722d9fa4437db4", size = 776308 }, { url = "https://files.pythonhosted.org/packages/22/57/d249827c3e37abe528674bfa97de4c61b18afb452d2afced690a745e0866/onnxruntime_genai-0.5.2-cp311-cp311-win_arm64.whl", hash = "sha256:893be15d2113438e60b8a1c0095892e0fd4f2b01dd470d6197337db2a5778c88", size = 751552 }, + { url = "https://files.pythonhosted.org/packages/cf/72/259de19e93e72b14d0a3910f1025f71da006a8dfc76c97792646b335a8a3/onnxruntime_genai-0.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6b438d7f4901081b8f3ff99db6c6ea15a3fcc107abce79859ff635e1278e26b0", size = 771097 }, + { url = "https://files.pythonhosted.org/packages/8c/72/73c95e357ada258025236437fb2b4d56fb7e8594db6361f4560ea97ca06c/onnxruntime_genai-0.5.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:d7bffb799d44656b2615fc43130a1a287d57e8893b80523e560924cf05770f1d", size = 871450 }, { url = "https://files.pythonhosted.org/packages/79/3d/43211c8a66d7ce54dea137ad7bec30767e3f2dc5e1e22befdcca290ebbe0/onnxruntime_genai-0.5.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb5b9650512e21a71d965e582d616b33df07978b0c3ecbd5bef0912a7b5f7832", size = 1380898 }, { url = "https://files.pythonhosted.org/packages/9f/7b/53b217ed0db401877fafa2f63d2ce7de754899f2bdf4cb415931e2019f18/onnxruntime_genai-0.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:422e9af27f182247378e9423f5745becfaffcdf7a4f452da17fd5d9390770ca7", size = 776974 }, { url = "https://files.pythonhosted.org/packages/08/c1/a69aeba29f40febd8d70d45044d4eb97905beb37fc8491b1628c8714ecc1/onnxruntime_genai-0.5.2-cp312-cp312-win_arm64.whl", hash = "sha256:315b23cb04749202c9cc3eb34f281bb4943de477a5aa46c99b940603b6a5d272", size = 751246 }, @@ -3559,6 +3576,8 @@ version = "6.1.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/26/10/2a30b13c61e7cf937f4adf90710776b7918ed0a9c434e2c38224732af310/psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a", size = 508565 } wheels = [ + { url = "https://files.pythonhosted.org/packages/da/2b/f4dea5d993d9cd22ad958eea828a41d5d225556123d372f02547c29c4f97/psutil-6.1.0-cp27-none-win32.whl", hash = "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e", size = 246648 }, + { url = "https://files.pythonhosted.org/packages/9f/14/4aa97a7f2e0ac33a050d990ab31686d651ae4ef8c86661fef067f00437b9/psutil-6.1.0-cp27-none-win_amd64.whl", hash = "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85", size = 249905 }, { url = "https://files.pythonhosted.org/packages/01/9e/8be43078a171381953cfee33c07c0d628594b5dbfc5157847b85022c2c1b/psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688", size = 247762 }, { url = "https://files.pythonhosted.org/packages/1d/cb/313e80644ea407f04f6602a9e23096540d9dc1878755f3952ea8d3d104be/psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e", size = 248777 }, { url = "https://files.pythonhosted.org/packages/65/8e/bcbe2025c587b5d703369b6a75b65d41d1367553da6e3f788aff91eaf5bd/psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38", size = 284259 }, @@ -4803,7 +4822,7 @@ ollama = [ { name = "ollama", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] onnx = [ - { name = "onnxruntime-genai", marker = "(platform_system != 'Darwin' and sys_platform == 'darwin') or (platform_system != 'Darwin' and sys_platform == 'linux') or (platform_system != 'Darwin' and sys_platform == 'win32')" }, + { name = "onnxruntime-genai", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] pandas = [ { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, @@ -4874,7 +4893,7 @@ requires-dist = [ { name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.25.0" }, { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" }, { name = "ollama", marker = "extra == 'ollama'", specifier = "~=0.4" }, - { name = "onnxruntime-genai", marker = "platform_system != 'Darwin' and extra == 'onnx'", specifier = "~=0.4" }, + { name = "onnxruntime-genai", marker = "extra == 'onnx'", specifier = "~=0.5" }, { name = "openai", specifier = "~=1.0" }, { name = "openapi-core", specifier = ">=0.18,<0.20" }, { name = "opentelemetry-api", specifier = "~=1.24" },