From 8b389b8231097c60978b6fd196b7d1a776602050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20M=C3=B6ller?= Date: Thu, 19 Dec 2024 10:34:05 +0100 Subject: [PATCH] Python: Upgrade Minimum Onnx Version to enable MacOS Unit Tests (#9981) Closes : #9979 enabled test cases for mac os upgraded uv.lock file ### Motivation and Context Using version 0.4.0 did not provide a pip package for MacOS, which forced us to disabled Unit Tests on MacOs With version 0.5.0 available we can enable the unit Tests for MacOS. Using Version 0.5.0 will enable following features for users : 1. Phi3.5 and Phi3.5 MoE 2. MacOS Support without Building the Code from Source 3. LoRa Adapter Swapping ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --------- Co-authored-by: Eduard van Valkenburg --- python/pyproject.toml | 5 +- .../completions/chat_completion_test_base.py | 10 +- ...completion_with_image_input_text_output.py | 5 +- .../completions/test_chat_completions.py | 5 +- .../completions/test_text_completion.py | 14 +- .../services/test_onnx_chat_completion.py | 327 +++++++++--------- .../services/test_onnx_text_completion.py | 134 +++---- .../ai/onnx/services/test_onnx_utils.py | 176 +++++----- .../test_onnx_prompt_execution_settings.py | 129 ++++--- python/uv.lock | 23 +- 10 files changed, 421 insertions(+), 407 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 3dd580cdf470..bf30da7ef96e 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -86,7 +86,7 @@ ollama = [ "ollama ~= 0.4" ] onnx = [ - "onnxruntime-genai ~= 0.4; platform_system != 'Darwin'" + "onnxruntime-genai ~= 0.5" ] anthropic = [ "anthropic ~= 0.32" @@ -156,7 +156,8 @@ filterwarnings = [ ] timeout = 120 markers = [ - "ollama: mark a test as requiring the Ollama service (use \"not ollama\" to skip those tests)" + "ollama: mark a test as requiring the Ollama service (use \"not ollama\" to skip those tests)", + "onnx: mark a test as requiring the Onnx service (use \"not onnx\" to skip those tests)" ] [tool.ruff] diff --git a/python/tests/integration/completions/chat_completion_test_base.py b/python/tests/integration/completions/chat_completion_test_base.py index d05157e607c5..7a4db5b8fcfe 100644 --- a/python/tests/integration/completions/chat_completion_test_base.py +++ b/python/tests/integration/completions/chat_completion_test_base.py @@ -2,7 +2,6 @@ import os -import platform import sys from typing import Annotated @@ -22,6 +21,7 @@ from semantic_kernel.connectors.ai.google.vertex_ai import VertexAIChatCompletion, VertexAIChatPromptExecutionSettings from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings +from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate from semantic_kernel.connectors.ai.open_ai import ( AzureChatCompletion, AzureChatPromptExecutionSettings, @@ -71,12 +71,6 @@ bedrock_setup: bool = is_service_setup_for_testing(["AWS_DEFAULT_REGION"], raise_if_not_set=False) -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings - from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate - - # A mock plugin that contains a function that returns a complex object. class PersonDetails(KernelBaseModel): id: str @@ -155,7 +149,7 @@ def services(self) -> dict[str, tuple[ServiceType | None, type[PromptExecutionSe "vertex_ai": (VertexAIChatCompletion() if vertex_ai_setup else None, VertexAIChatPromptExecutionSettings), "onnx_gen_ai": ( OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3V) if onnx_setup else None, - OnnxGenAIPromptExecutionSettings if not skip_on_mac_available else None, + OnnxGenAIPromptExecutionSettings, ), "bedrock_amazon_titan": ( BedrockChatCompletion(model_id="amazon.titan-text-premier-v1:0") if bedrock_setup else None, diff --git a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py index f45f3367268c..2eb52e22f1e5 100644 --- a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py +++ b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py @@ -118,7 +118,10 @@ ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="Where was it made?")]), ], {}, - marks=pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + marks=( + pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + pytest.mark.onnx, + ), id="onnx_gen_ai_image_input_file", ), pytest.param( diff --git a/python/tests/integration/completions/test_chat_completions.py b/python/tests/integration/completions/test_chat_completions.py index e3a77542f0f6..67eab08b0a90 100644 --- a/python/tests/integration/completions/test_chat_completions.py +++ b/python/tests/integration/completions/test_chat_completions.py @@ -150,7 +150,10 @@ class Reasoning(KernelBaseModel): ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="How are you today?")]), ], {}, - marks=pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + marks=( + pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + pytest.mark.onnx, + ), id="onnx_gen_ai", ), # endregion diff --git a/python/tests/integration/completions/test_text_completion.py b/python/tests/integration/completions/test_text_completion.py index 3e9b34ef76aa..7e6792de4465 100644 --- a/python/tests/integration/completions/test_text_completion.py +++ b/python/tests/integration/completions/test_text_completion.py @@ -1,6 +1,5 @@ # Copyright (c) Microsoft. All rights reserved. -import platform import sys from functools import partial from typing import Any @@ -19,6 +18,7 @@ from semantic_kernel.connectors.ai.google.vertex_ai import VertexAITextCompletion, VertexAITextPromptExecutionSettings from semantic_kernel.connectors.ai.hugging_face import HuggingFacePromptExecutionSettings, HuggingFaceTextCompletion from semantic_kernel.connectors.ai.ollama import OllamaTextCompletion, OllamaTextPromptExecutionSettings +from semantic_kernel.connectors.ai.onnx import OnnxGenAIPromptExecutionSettings, OnnxGenAITextCompletion from semantic_kernel.connectors.ai.open_ai import ( AzureOpenAISettings, AzureTextCompletion, @@ -43,11 +43,6 @@ ) # Tests are optional for ONNX bedrock_setup = is_service_setup_for_testing(["AWS_DEFAULT_REGION"], raise_if_not_set=False) -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx import OnnxGenAIPromptExecutionSettings, OnnxGenAITextCompletion - - pytestmark = pytest.mark.parametrize( "service_id, execution_settings_kwargs, inputs, kwargs", [ @@ -128,7 +123,10 @@ {}, ["<|user|>Repeat the word Hello<|end|><|assistant|>"], {}, - marks=pytest.mark.skipif(not onnx_setup, reason="Need local Onnx setup"), + marks=( + pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"), + pytest.mark.onnx, + ), id="onnx_gen_ai_text_completion", ), pytest.param( @@ -242,7 +240,7 @@ def services(self) -> dict[str, tuple[ServiceType | None, type[PromptExecutionSe ), "onnx_gen_ai": ( OnnxGenAITextCompletion() if onnx_setup else None, - OnnxGenAIPromptExecutionSettings if not skip_on_mac_available else None, + OnnxGenAIPromptExecutionSettings, ), # Amazon Bedrock supports models from multiple providers but requests to and responses from the models are # inconsistent. So we need to test each model separately. diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py index 5454f4d52504..30c9573fef6c 100644 --- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py +++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py @@ -1,185 +1,186 @@ # Copyright (c) Microsoft. All rights reserved. import json import os -import platform from unittest.mock import MagicMock, mock_open, patch import pytest +from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent, ImageContent from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidExecutionSettingsError from semantic_kernel.kernel import Kernel from tests.unit.connectors.ai.onnx.conftest import gen_ai_config, gen_ai_config_vision -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx import ( # noqa: E402 - OnnxGenAIChatCompletion, - OnnxGenAIPromptExecutionSettings, - ) - from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate # noqa: E402 - - -@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS") -class TestOnnxChatCompletion: - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_valid_env_variable(self, gen_ai_config, model, tokenizer, onnx_unit_test_env): - service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") - assert not service.enable_multi_modality - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_vision_valid_env_variable( - self, gen_ai_vision_config, model, tokenizer, onnx_unit_test_env - ): - service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") - assert service.enable_multi_modality - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_valid_parameter(self, gen_ai_config, model, tokenizer): - assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template=ONNXTemplate.PHI3) - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_str_template(self, gen_ai_config, model, tokenizer): - assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template="phi3") - - def test_onnx_chat_completion_with_invalid_model(self): - with pytest.raises(ServiceInitializationError): - OnnxGenAIChatCompletion( - ai_model_path="/invalid_path", - template=ONNXTemplate.PHI3, - ) - - def test_onnx_chat_completion_without_prompt_template(self): - with pytest.raises(TypeError): - OnnxGenAIChatCompletion() - - def test_onnx_chat_completion_with_invalid_env_variable(self, onnx_unit_test_env): - with pytest.raises(ServiceInitializationError): - OnnxGenAIChatCompletion( - template=ONNXTemplate.PHI3, - ) - - @pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_CHAT_MODEL_FOLDER"]], indirect=True) - def test_onnx_chat_completion_with_missing_ai_path(self, onnx_unit_test_env): - with pytest.raises(ServiceInitializationError): - OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_chat_completion(self, gen_ai_config, model, tokenizer): - generator_mock = MagicMock() - generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] - - chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test") - history = ChatHistory() - history.add_system_message("test") - history.add_user_message("test") +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_valid_env_variable(gen_ai_config, model, tokenizer, onnx_unit_test_env): + service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") + assert not service.enable_multi_modality + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_vision_valid_env_variable( + gen_ai_vision_config, model, tokenizer, onnx_unit_test_env +): + service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") + assert service.enable_multi_modality + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_valid_parameter(gen_ai_config, model, tokenizer): + assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template=ONNXTemplate.PHI3) + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_str_template(gen_ai_config, model, tokenizer): + assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template="phi3") + + +def test_onnx_chat_completion_with_invalid_model(): + with pytest.raises(ServiceInitializationError): + OnnxGenAIChatCompletion( + ai_model_path="/invalid_path", + template=ONNXTemplate.PHI3, + ) + + +def test_onnx_chat_completion_without_prompt_template(): + with pytest.raises(TypeError): + OnnxGenAIChatCompletion() + + +def test_onnx_chat_completion_with_invalid_env_variable(onnx_unit_test_env): + with pytest.raises(ServiceInitializationError): + OnnxGenAIChatCompletion( + template=ONNXTemplate.PHI3, + ) + + +@pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_CHAT_MODEL_FOLDER"]], indirect=True) +def test_onnx_chat_completion_with_missing_ai_path(onnx_unit_test_env): + with pytest.raises(ServiceInitializationError): + OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env") - with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock): - completed_text: ChatMessageContent = await chat_completion.get_chat_message_content( - prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel() - ) - assert str(completed_text) == "Hello" +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_chat_completion(gen_ai_config, model, tokenizer): + generator_mock = MagicMock() + generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_chat_completion_streaming(self, gen_ai_config, model, tokenizer): - generator_mock = MagicMock() - generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] + chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test") - chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test") + history = ChatHistory() + history.add_system_message("test") + history.add_user_message("test") + + with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock): + completed_text: ChatMessageContent = await chat_completion.get_chat_message_content( + prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel() + ) + + assert str(completed_text) == "Hello" + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_chat_completion_streaming(gen_ai_config, model, tokenizer): + generator_mock = MagicMock() + generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] + + chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test") + + history = ChatHistory() + history.add_system_message("test") + history.add_user_message("test") + + completed_text: str = "" + + with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock): + async for chunk in chat_completion.get_streaming_chat_message_content( + prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel() + ): + completed_text += str(chunk) + + assert completed_text == "Hello" + + +@patch("onnxruntime_genai.Model") +def test_onnx_chat_get_image_history(model): + builtin_open = open # save the unpatched version + + def patch_open(*args, **kwargs): + if "genai_config.json" in str(args[0]): + # mocked open for path "genai_config.json" + return mock_open(read_data=json.dumps(gen_ai_config_vision))(*args, **kwargs) + # unpatched version for every other path + return builtin_open(*args, **kwargs) + + with patch("builtins.open", patch_open): + chat_completion = OnnxGenAIChatCompletion( + template=ONNXTemplate.PHI3, + ai_model_path="test", + ) + + image_content = ImageContent.from_image_path( + image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg") + ) history = ChatHistory() history.add_system_message("test") history.add_user_message("test") + history.add_message( + ChatMessageContent( + role=AuthorRole.USER, + items=[image_content], + ), + ) + + last_image = chat_completion._get_images_from_history(history) + assert last_image == image_content + + +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_chat_get_image_history_with_not_multimodal(model, tokenizer): + builtin_open = open # save the unpatched version + + def patch_open(*args, **kwargs): + if "genai_config.json" in str(args[0]): + # mocked open for path "genai_config.json" + return mock_open(read_data=json.dumps(gen_ai_config))(*args, **kwargs) + # unpatched version for every other path + return builtin_open(*args, **kwargs) + + with patch("builtins.open", patch_open): + chat_completion = OnnxGenAIChatCompletion( + template=ONNXTemplate.PHI3, + ai_model_path="test", + ) + + image_content = ImageContent.from_image_path( + image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg") + ) - completed_text: str = "" - - with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock): - async for chunk in chat_completion.get_streaming_chat_message_content( - prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel() - ): - completed_text += str(chunk) - - assert completed_text == "Hello" - - @patch("onnxruntime_genai.Model") - def test_onnx_chat_get_image_history(self, model): - builtin_open = open # save the unpatched version - - def patch_open(*args, **kwargs): - if "genai_config.json" in str(args[0]): - # mocked open for path "genai_config.json" - return mock_open(read_data=json.dumps(gen_ai_config_vision))(*args, **kwargs) - # unpatched version for every other path - return builtin_open(*args, **kwargs) - - with patch("builtins.open", patch_open): - chat_completion = OnnxGenAIChatCompletion( - template=ONNXTemplate.PHI3, - ai_model_path="test", - ) - - image_content = ImageContent.from_image_path( - image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg") - ) - - history = ChatHistory() - history.add_system_message("test") - history.add_user_message("test") - history.add_message( - ChatMessageContent( - role=AuthorRole.USER, - items=[image_content], - ), - ) - - last_image = chat_completion._get_images_from_history(history) - assert last_image == image_content - - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_chat_get_image_history_with_not_multimodal(self, model, tokenizer): - builtin_open = open # save the unpatched version - - def patch_open(*args, **kwargs): - if "genai_config.json" in str(args[0]): - # mocked open for path "genai_config.json" - return mock_open(read_data=json.dumps(gen_ai_config))(*args, **kwargs) - # unpatched version for every other path - return builtin_open(*args, **kwargs) - - with patch("builtins.open", patch_open): - chat_completion = OnnxGenAIChatCompletion( - template=ONNXTemplate.PHI3, - ai_model_path="test", - ) - - image_content = ImageContent.from_image_path( - image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg") - ) - - history = ChatHistory() - history.add_system_message("test") - history.add_user_message("test") - history.add_message( - ChatMessageContent( - role=AuthorRole.USER, - items=[image_content], - ), - ) - - with pytest.raises(ServiceInvalidExecutionSettingsError): - _ = await chat_completion._get_images_from_history(history) + history = ChatHistory() + history.add_system_message("test") + history.add_user_message("test") + history.add_message( + ChatMessageContent( + role=AuthorRole.USER, + items=[image_content], + ), + ) + + with pytest.raises(ServiceInvalidExecutionSettingsError): + _ = await chat_completion._get_images_from_history(history) diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py index 842c34f5cf3b..09435f02667f 100644 --- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py +++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py @@ -1,77 +1,77 @@ # Copyright (c) Microsoft. All rights reserved. import json -import platform from unittest.mock import MagicMock, mock_open, patch import pytest +from semantic_kernel.connectors.ai.onnx import ( # noqa: E402 + OnnxGenAIPromptExecutionSettings, + OnnxGenAITextCompletion, +) from semantic_kernel.contents import TextContent from semantic_kernel.exceptions import ServiceInitializationError from tests.unit.connectors.ai.onnx.conftest import gen_ai_config -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx import ( # noqa: E402 - OnnxGenAIPromptExecutionSettings, - OnnxGenAITextCompletion, - ) - - -@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS") -class TestOnnxTextCompletion: - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_valid_env_variable(self, gen_ai_config, model, tokenizer, onnx_unit_test_env): - assert OnnxGenAITextCompletion(env_file_path="test.env") - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - def test_onnx_chat_completion_with_valid_parameter(self, gen_ai_config, model, tokenizer): - assert OnnxGenAITextCompletion(ai_model_path="/valid_path") - - def test_onnx_chat_completion_with_invalid_model(self): - with pytest.raises(ServiceInitializationError): - OnnxGenAITextCompletion(ai_model_path="/invalid_path") - - def test_onnx_chat_completion_with_invalid_env_variable(self, onnx_unit_test_env): - with pytest.raises(ServiceInitializationError): - OnnxGenAITextCompletion() - - @pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_TEXT_MODEL_FOLDER"]], indirect=True) - def test_onnx_chat_completion_with_missing_ai_path(self, onnx_unit_test_env): - with pytest.raises(ServiceInitializationError): - OnnxGenAITextCompletion(env_file_path="test.env") - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_text_completion(self, gen_ai_config, model, tokenizer): - generator_mock = MagicMock() - generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] - - text_completion = OnnxGenAITextCompletion(ai_model_path="test") - with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock): - completed_text: TextContent = await text_completion.get_text_content( - prompt="test", settings=OnnxGenAIPromptExecutionSettings() - ) - - assert completed_text.text == "Hello" - - @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) - @patch("onnxruntime_genai.Model") - @patch("onnxruntime_genai.Tokenizer") - async def test_onnx_text_completion_streaming(self, gen_ai_config, model, tokenizer): - generator_mock = MagicMock() - generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] - - text_completion = OnnxGenAITextCompletion(ai_model_path="test") - completed_text: str = "" - with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock): - async for chunk in text_completion.get_streaming_text_content( - prompt="test", settings=OnnxGenAIPromptExecutionSettings() - ): - completed_text += chunk.text - - assert completed_text == "Hello" + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_valid_env_variable(gen_ai_config, model, tokenizer, onnx_unit_test_env): + assert OnnxGenAITextCompletion(env_file_path="test.env") + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +def test_onnx_chat_completion_with_valid_parameter(gen_ai_config, model, tokenizer): + assert OnnxGenAITextCompletion(ai_model_path="/valid_path") + + +def test_onnx_chat_completion_with_invalid_model(): + with pytest.raises(ServiceInitializationError): + OnnxGenAITextCompletion(ai_model_path="/invalid_path") + + +def test_onnx_chat_completion_with_invalid_env_variable(onnx_unit_test_env): + with pytest.raises(ServiceInitializationError): + OnnxGenAITextCompletion() + + +@pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_TEXT_MODEL_FOLDER"]], indirect=True) +def test_onnx_chat_completion_with_missing_ai_path(onnx_unit_test_env): + with pytest.raises(ServiceInitializationError): + OnnxGenAITextCompletion(env_file_path="test.env") + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_text_completion(gen_ai_config, model, tokenizer): + generator_mock = MagicMock() + generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] + + text_completion = OnnxGenAITextCompletion(ai_model_path="test") + with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock): + completed_text: TextContent = await text_completion.get_text_content( + prompt="test", settings=OnnxGenAIPromptExecutionSettings() + ) + + assert completed_text.text == "Hello" + + +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config)) +@patch("onnxruntime_genai.Model") +@patch("onnxruntime_genai.Tokenizer") +async def test_onnx_text_completion_streaming(gen_ai_config, model, tokenizer): + generator_mock = MagicMock() + generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]] + + text_completion = OnnxGenAITextCompletion(ai_model_path="test") + completed_text: str = "" + with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock): + async for chunk in text_completion.get_streaming_text_content( + prompt="test", settings=OnnxGenAIPromptExecutionSettings() + ): + completed_text += chunk.text + + assert completed_text == "Hello" diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py index e746a1624a01..b4cb1e281826 100644 --- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py +++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py @@ -1,97 +1,93 @@ # Copyright (c) Microsoft. All rights reserved. -import platform +from semantic_kernel.connectors.ai.onnx.utils import ( + gemma_template, + llama_template, + phi3_template, + phi3v_template, +) +from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent -import pytest -from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent +def test_phi3v_template_with_text_and_image(): + history = ChatHistory( + messages=[ + {"role": AuthorRole.SYSTEM, "content": "System message"}, + { + "role": AuthorRole.USER, + "items": [TextContent(text="User text message"), ImageContent(url="http://example.com/image.png")], + }, + {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, + ] + ) + + expected_output = ( + "<|system|>\nSystem message<|end|>\n" + "<|user|>\nUser text message<|end|>\n" + "<|image_1|>\n" + "<|assistant|>\nAssistant message<|end|>\n" + "<|assistant|>\n" + ) + + assert phi3v_template(history) == expected_output + -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx.utils import ( # noqa: E402 - gemma_template, - llama_template, - phi3_template, - phi3v_template, +def test_phi3_template_with_only_text(): + history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}]) + + expected_output = "<|user|>\nUser text message<|end|>\n<|assistant|>\n" + + assert phi3_template(history) == expected_output + + +def test_gemma_template_with_user_and_assistant_messages(): + history = ChatHistory( + messages=[ + {"role": AuthorRole.USER, "content": "User text message"}, + {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, + ] ) + expected_output = ( + "" + "user\nUser text message\n" + "model\nAssistant message\n" + "model\n" + ) + + assert gemma_template(history) == expected_output + + +def test_gemma_template_with_only_user_message(): + history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}]) + + expected_output = "user\nUser text message\nmodel\n" + + assert gemma_template(history) == expected_output + + +def test_llama_template_with_user_and_assistant_messages(): + history = ChatHistory( + messages=[ + {"role": AuthorRole.USER, "content": "User text message"}, + {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, + ] + ) + + expected_output = ( + "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>\n\nAssistant message<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>" + ) + + assert llama_template(history) == expected_output + + +def test_llama_template_with_only_user_message(): + history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}]) + + expected_output = ( + "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>" + ) -@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS") -class TestOnnxUtils: - def test_phi3v_template_with_text_and_image(self): - history = ChatHistory( - messages=[ - {"role": AuthorRole.SYSTEM, "content": "System message"}, - { - "role": AuthorRole.USER, - "items": [TextContent(text="User text message"), ImageContent(url="http://example.com/image.png")], - }, - {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, - ] - ) - - expected_output = ( - "<|system|>\nSystem message<|end|>\n" - "<|user|>\nUser text message<|end|>\n" - "<|image_1|>\n" - "<|assistant|>\nAssistant message<|end|>\n" - "<|assistant|>\n" - ) - - assert phi3v_template(history) == expected_output - - def test_phi3_template_with_only_text(self): - history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}]) - - expected_output = "<|user|>\nUser text message<|end|>\n<|assistant|>\n" - - assert phi3_template(history) == expected_output - - def test_gemma_template_with_user_and_assistant_messages(self): - history = ChatHistory( - messages=[ - {"role": AuthorRole.USER, "content": "User text message"}, - {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, - ] - ) - - expected_output = ( - "" - "user\nUser text message\n" - "model\nAssistant message\n" - "model\n" - ) - - assert gemma_template(history) == expected_output - - def test_gemma_template_with_only_user_message(self): - history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}]) - - expected_output = "user\nUser text message\nmodel\n" - - assert gemma_template(history) == expected_output - - def test_llama_template_with_user_and_assistant_messages(self): - history = ChatHistory( - messages=[ - {"role": AuthorRole.USER, "content": "User text message"}, - {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, - ] - ) - - expected_output = ( - "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>" - "<|start_header_id|>assistant<|end_header_id|>\n\nAssistant message<|eot_id|>" - "<|start_header_id|>assistant<|end_header_id|>" - ) - - assert llama_template(history) == expected_output - - def test_llama_template_with_only_user_message(self): - history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}]) - - expected_output = ( - "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>" - "<|start_header_id|>assistant<|end_header_id|>" - ) - - assert llama_template(history) == expected_output + assert llama_template(history) == expected_output diff --git a/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py b/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py index 80c84619d0a2..3d6942ecb8a5 100644 --- a/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py +++ b/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py @@ -1,85 +1,84 @@ # Copyright (c) Microsoft. All rights reserved. -import platform - import pytest from pydantic import ValidationError +from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import ( + OnnxGenAIPromptExecutionSettings, +) from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings -skip_on_mac_available = platform.system() == "Darwin" -if not skip_on_mac_available: - from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import ( - OnnxGenAIPromptExecutionSettings, + +def test_default_onnx_chat_prompt_execution_settings(): + settings = OnnxGenAIPromptExecutionSettings() + assert settings.temperature is None + assert settings.top_p is None + + +def test_custom_onnx_chat_prompt_execution_settings(): + settings = OnnxGenAIPromptExecutionSettings( + temperature=0.5, + top_p=0.5, + max_length=128, ) + assert settings.temperature == 0.5 + assert settings.top_p == 0.5 + assert settings.max_length == 128 -@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS") -class TestOnnxPromptExecutionSettings: - def test_default_onnx_chat_prompt_execution_settings(self): - settings = OnnxGenAIPromptExecutionSettings() - assert settings.temperature is None - assert settings.top_p is None +def test_onnx_chat_prompt_execution_settings_from_default_completion_config(): + settings = PromptExecutionSettings(service_id="test_service") + chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings) + assert chat_settings.service_id == "test_service" + assert chat_settings.temperature is None + assert chat_settings.top_p is None - def test_custom_onnx_chat_prompt_execution_settings(self): - settings = OnnxGenAIPromptExecutionSettings( - temperature=0.5, - top_p=0.5, - max_length=128, - ) - assert settings.temperature == 0.5 - assert settings.top_p == 0.5 - assert settings.max_length == 128 - def test_onnx_chat_prompt_execution_settings_from_default_completion_config(self): - settings = PromptExecutionSettings(service_id="test_service") - chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings) - assert chat_settings.service_id == "test_service" - assert chat_settings.temperature is None - assert chat_settings.top_p is None +def test_onnx_chat_prompt_execution_settings_from_onnx_prompt_execution_settings(): + chat_settings = OnnxGenAIPromptExecutionSettings(service_id="test_service", temperature=1.0) + new_settings = OnnxGenAIPromptExecutionSettings(service_id="test_2", temperature=0.0) + chat_settings.update_from_prompt_execution_settings(new_settings) + assert chat_settings.service_id == "test_2" + assert chat_settings.temperature == 0.0 - def test_onnx_chat_prompt_execution_settings_from_onnx_prompt_execution_settings(self): - chat_settings = OnnxGenAIPromptExecutionSettings(service_id="test_service", temperature=1.0) - new_settings = OnnxGenAIPromptExecutionSettings(service_id="test_2", temperature=0.0) - chat_settings.update_from_prompt_execution_settings(new_settings) - assert chat_settings.service_id == "test_2" - assert chat_settings.temperature == 0.0 - def test_onnx_chat_prompt_execution_settings_from_custom_completion_config(self): - settings = PromptExecutionSettings( - service_id="test_service", - extension_data={ - "temperature": 0.5, - "top_p": 0.5, - "max_length": 128, - }, - ) - chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings) - assert chat_settings.temperature == 0.5 - assert chat_settings.top_p == 0.5 - assert chat_settings.max_length == 128 +def test_onnx_chat_prompt_execution_settings_from_custom_completion_config(): + settings = PromptExecutionSettings( + service_id="test_service", + extension_data={ + "temperature": 0.5, + "top_p": 0.5, + "max_length": 128, + }, + ) + chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings) + assert chat_settings.temperature == 0.5 + assert chat_settings.top_p == 0.5 + assert chat_settings.max_length == 128 + - def test_create_options(self): - settings = OnnxGenAIPromptExecutionSettings( +def test_create_options(): + settings = OnnxGenAIPromptExecutionSettings( + service_id="test_service", + extension_data={ + "temperature": 0.5, + "top_p": 0.5, + "max_length": 128, + }, + ) + options = settings.prepare_settings_dict() + assert options["temperature"] == 0.5 + assert options["top_p"] == 0.5 + assert options["max_length"] == 128 + + +def test_create_options_with_wrong_parameter(): + with pytest.raises(ValidationError): + OnnxGenAIPromptExecutionSettings( service_id="test_service", + function_choice_behavior="auto", extension_data={ - "temperature": 0.5, + "temperature": 10.0, "top_p": 0.5, "max_length": 128, }, ) - options = settings.prepare_settings_dict() - assert options["temperature"] == 0.5 - assert options["top_p"] == 0.5 - assert options["max_length"] == 128 - - def test_create_options_with_wrong_parameter(self): - with pytest.raises(ValidationError): - OnnxGenAIPromptExecutionSettings( - service_id="test_service", - function_choice_behavior="auto", - extension_data={ - "temperature": 10.0, - "top_p": 0.5, - "max_length": 128, - }, - ) diff --git a/python/uv.lock b/python/uv.lock index 471f32675a40..e359cea177f1 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -2694,6 +2694,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/7f/7f/7fbae15a3982dc9595e49ce0f19332423b260045d0a6afe93cdbe2f1f624/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3", size = 363333771 }, { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 }, + { url = "https://files.pythonhosted.org/packages/e2/2a/4f27ca96232e8b5269074a72e03b4e0d43aa68c9b965058b1684d07c6ff8/nvidia_cublas_cu12-12.4.5.8-py3-none-win_amd64.whl", hash = "sha256:5a796786da89203a0657eda402bcdcec6180254a8ac22d72213abc42069522dc", size = 396895858 }, ] [[package]] @@ -2703,6 +2704,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/93/b5/9fb3d00386d3361b03874246190dfec7b206fd74e6e287b26a8fcb359d95/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a", size = 12354556 }, { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 }, + { url = "https://files.pythonhosted.org/packages/f3/79/8cf313ec17c58ccebc965568e5bcb265cdab0a1df99c4e674bb7a3b99bfe/nvidia_cuda_cupti_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:5688d203301ab051449a2b1cb6690fbe90d2b372f411521c86018b950f3d7922", size = 9938035 }, ] [[package]] @@ -2712,6 +2714,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/77/aa/083b01c427e963ad0b314040565ea396f914349914c298556484f799e61b/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198", size = 24133372 }, { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 }, + { url = "https://files.pythonhosted.org/packages/7c/30/8c844bfb770f045bcd8b2c83455c5afb45983e1a8abf0c4e5297b481b6a5/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:a961b2f1d5f17b14867c619ceb99ef6fcec12e46612711bcec78eb05068a60ec", size = 19751955 }, ] [[package]] @@ -2721,6 +2724,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/a1/aa/b656d755f474e2084971e9a297def515938d56b466ab39624012070cb773/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3", size = 894177 }, { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 }, + { url = "https://files.pythonhosted.org/packages/a8/8b/450e93fab75d85a69b50ea2d5fdd4ff44541e0138db16f9cd90123ef4de4/nvidia_cuda_runtime_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:09c2e35f48359752dfa822c09918211844a3d93c100a715d79b59591130c5e1e", size = 878808 }, ] [[package]] @@ -2732,6 +2736,7 @@ dependencies = [ ] wheels = [ { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 }, + { url = "https://files.pythonhosted.org/packages/3f/d0/f90ee6956a628f9f04bf467932c0a25e5a7e706a684b896593c06c82f460/nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a", size = 679925892 }, ] [[package]] @@ -2744,6 +2749,7 @@ dependencies = [ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/8a/0e728f749baca3fbeffad762738276e5df60851958be7783af121a7221e7/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399", size = 211422548 }, { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 }, + { url = "https://files.pythonhosted.org/packages/f6/ee/3f3f8e9874f0be5bbba8fb4b62b3de050156d159f8b6edc42d6f1074113b/nvidia_cufft_cu12-11.2.1.3-py3-none-win_amd64.whl", hash = "sha256:d802f4954291101186078ccbe22fc285a902136f974d369540fd4a5333d1440b", size = 210576476 }, ] [[package]] @@ -2753,6 +2759,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/80/9c/a79180e4d70995fdf030c6946991d0171555c6edf95c265c6b2bf7011112/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9", size = 56314811 }, { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 }, + { url = "https://files.pythonhosted.org/packages/1c/22/2573503d0d4e45673c263a313f79410e110eb562636b0617856fdb2ff5f6/nvidia_curand_cu12-10.3.5.147-py3-none-win_amd64.whl", hash = "sha256:f307cc191f96efe9e8f05a87096abc20d08845a841889ef78cb06924437f6771", size = 55799918 }, ] [[package]] @@ -2767,6 +2774,7 @@ dependencies = [ wheels = [ { url = "https://files.pythonhosted.org/packages/46/6b/a5c33cf16af09166845345275c34ad2190944bcc6026797a39f8e0a282e0/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e", size = 127634111 }, { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 }, + { url = "https://files.pythonhosted.org/packages/f2/be/d435b7b020e854d5d5a682eb5de4328fd62f6182507406f2818280e206e2/nvidia_cusolver_cu12-11.6.1.9-py3-none-win_amd64.whl", hash = "sha256:e77314c9d7b694fcebc84f58989f3aa4fb4cb442f12ca1a9bde50f5e8f6d1b9c", size = 125224015 }, ] [[package]] @@ -2779,6 +2787,7 @@ dependencies = [ wheels = [ { url = "https://files.pythonhosted.org/packages/96/a9/c0d2f83a53d40a4a41be14cea6a0bf9e668ffcf8b004bd65633f433050c0/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3", size = 207381987 }, { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 }, + { url = "https://files.pythonhosted.org/packages/a2/e0/3155ca539760a8118ec94cc279b34293309bcd14011fc724f87f31988843/nvidia_cusparse_cu12-12.3.1.170-py3-none-win_amd64.whl", hash = "sha256:9bc90fb087bc7b4c15641521f31c0371e9a612fc2ba12c338d3ae032e6b6797f", size = 204684315 }, ] [[package]] @@ -2796,6 +2805,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/02/45/239d52c05074898a80a900f49b1615d81c07fceadd5ad6c4f86a987c0bc4/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83", size = 20552510 }, { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 }, + { url = "https://files.pythonhosted.org/packages/81/19/0babc919031bee42620257b9a911c528f05fb2688520dcd9ca59159ffea8/nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1", size = 95336325 }, ] [[package]] @@ -2805,6 +2815,7 @@ source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/06/39/471f581edbb7804b39e8063d92fc8305bdc7a80ae5c07dbe6ea5c50d14a5/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3", size = 100417 }, { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 }, + { url = "https://files.pythonhosted.org/packages/54/1b/f77674fbb73af98843be25803bbd3b9a4f0a96c75b8d33a2854a5c7d2d77/nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485", size = 66307 }, ] [[package]] @@ -2874,11 +2885,17 @@ dependencies = [ { name = "onnxruntime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/52/35/22a421f852eb14f47c33a4dd4c3ef58a2f3d5a96be8bb6d6cc271b2a0e83/onnxruntime_genai-0.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cd322ead0027fbfa309e7be76c4512157ad369dc189ab3334a58a199b4f58a02", size = 769921 }, + { url = "https://files.pythonhosted.org/packages/7f/1b/5166ed4a73c5e9f92e6db4d7838923ffd595cea164661fae20d82e3a6966/onnxruntime_genai-0.5.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:79d721a33e80a9664aeeb87c0ceec75801fc81e48e8ff7940e3658d0b28f25cc", size = 869111 }, { url = "https://files.pythonhosted.org/packages/12/5b/6f08f9435f0c3977046cb4292ab1e836c22cd7d56fc87ace4d2a90dfb828/onnxruntime_genai-0.5.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd7954f9dc829e69dabd7f676443529ac18171ec8077438c16364d381733070e", size = 1380370 }, { url = "https://files.pythonhosted.org/packages/57/d6/91e486424f924c2a99e8f1bd201180979101ecc09bee1ca7f53dae1c8a38/onnxruntime_genai-0.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:4d2968df6d8064664a5f095006c70520f4ca689204b695e88951f088477bc1e0", size = 776263 }, + { url = "https://files.pythonhosted.org/packages/3e/3d/e2d8f89c05c6cf35e2ade2b335b1b97725327591b8fb141d266ab98615f9/onnxruntime_genai-0.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:217c748f8ebd1a5082e1ad8ee8fc90fc1a4e9ce7839189f4c2c2545d1390af15", size = 769888 }, + { url = "https://files.pythonhosted.org/packages/33/13/66ffa143cc82f8352ec87ba0501bc21e05dd9e84fbbad530e74a705ac911/onnxruntime_genai-0.5.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6194aabd589b3ffb571b325f504266ac47c33c434abfd87575c30d7a3e1179c9", size = 869092 }, { url = "https://files.pythonhosted.org/packages/6a/17/a29c0cf89d90374234b8e510fcb970f2e043b42689b5ea23cbdab5a414b6/onnxruntime_genai-0.5.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:88edb36c9e2d670316f1e6e4ce27a86f212648a92053a94a31f88b1f4d6c0935", size = 1380461 }, { url = "https://files.pythonhosted.org/packages/59/b1/acb1daf1a08c8098c828e7ea9e187b9728a8fc151a4df4911f988c08a874/onnxruntime_genai-0.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:09b697f955616156948f21366d13d02884a15521926f68a259722d9fa4437db4", size = 776308 }, { url = "https://files.pythonhosted.org/packages/22/57/d249827c3e37abe528674bfa97de4c61b18afb452d2afced690a745e0866/onnxruntime_genai-0.5.2-cp311-cp311-win_arm64.whl", hash = "sha256:893be15d2113438e60b8a1c0095892e0fd4f2b01dd470d6197337db2a5778c88", size = 751552 }, + { url = "https://files.pythonhosted.org/packages/cf/72/259de19e93e72b14d0a3910f1025f71da006a8dfc76c97792646b335a8a3/onnxruntime_genai-0.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6b438d7f4901081b8f3ff99db6c6ea15a3fcc107abce79859ff635e1278e26b0", size = 771097 }, + { url = "https://files.pythonhosted.org/packages/8c/72/73c95e357ada258025236437fb2b4d56fb7e8594db6361f4560ea97ca06c/onnxruntime_genai-0.5.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:d7bffb799d44656b2615fc43130a1a287d57e8893b80523e560924cf05770f1d", size = 871450 }, { url = "https://files.pythonhosted.org/packages/79/3d/43211c8a66d7ce54dea137ad7bec30767e3f2dc5e1e22befdcca290ebbe0/onnxruntime_genai-0.5.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb5b9650512e21a71d965e582d616b33df07978b0c3ecbd5bef0912a7b5f7832", size = 1380898 }, { url = "https://files.pythonhosted.org/packages/9f/7b/53b217ed0db401877fafa2f63d2ce7de754899f2bdf4cb415931e2019f18/onnxruntime_genai-0.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:422e9af27f182247378e9423f5745becfaffcdf7a4f452da17fd5d9390770ca7", size = 776974 }, { url = "https://files.pythonhosted.org/packages/08/c1/a69aeba29f40febd8d70d45044d4eb97905beb37fc8491b1628c8714ecc1/onnxruntime_genai-0.5.2-cp312-cp312-win_arm64.whl", hash = "sha256:315b23cb04749202c9cc3eb34f281bb4943de477a5aa46c99b940603b6a5d272", size = 751246 }, @@ -3559,6 +3576,8 @@ version = "6.1.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/26/10/2a30b13c61e7cf937f4adf90710776b7918ed0a9c434e2c38224732af310/psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a", size = 508565 } wheels = [ + { url = "https://files.pythonhosted.org/packages/da/2b/f4dea5d993d9cd22ad958eea828a41d5d225556123d372f02547c29c4f97/psutil-6.1.0-cp27-none-win32.whl", hash = "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e", size = 246648 }, + { url = "https://files.pythonhosted.org/packages/9f/14/4aa97a7f2e0ac33a050d990ab31686d651ae4ef8c86661fef067f00437b9/psutil-6.1.0-cp27-none-win_amd64.whl", hash = "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85", size = 249905 }, { url = "https://files.pythonhosted.org/packages/01/9e/8be43078a171381953cfee33c07c0d628594b5dbfc5157847b85022c2c1b/psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688", size = 247762 }, { url = "https://files.pythonhosted.org/packages/1d/cb/313e80644ea407f04f6602a9e23096540d9dc1878755f3952ea8d3d104be/psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e", size = 248777 }, { url = "https://files.pythonhosted.org/packages/65/8e/bcbe2025c587b5d703369b6a75b65d41d1367553da6e3f788aff91eaf5bd/psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38", size = 284259 }, @@ -4803,7 +4822,7 @@ ollama = [ { name = "ollama", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] onnx = [ - { name = "onnxruntime-genai", marker = "(platform_system != 'Darwin' and sys_platform == 'darwin') or (platform_system != 'Darwin' and sys_platform == 'linux') or (platform_system != 'Darwin' and sys_platform == 'win32')" }, + { name = "onnxruntime-genai", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] pandas = [ { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, @@ -4874,7 +4893,7 @@ requires-dist = [ { name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.25.0" }, { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" }, { name = "ollama", marker = "extra == 'ollama'", specifier = "~=0.4" }, - { name = "onnxruntime-genai", marker = "platform_system != 'Darwin' and extra == 'onnx'", specifier = "~=0.4" }, + { name = "onnxruntime-genai", marker = "extra == 'onnx'", specifier = "~=0.5" }, { name = "openai", specifier = "~=1.0" }, { name = "openapi-core", specifier = ">=0.18,<0.20" }, { name = "opentelemetry-api", specifier = "~=1.24" },