diff --git a/python/pyproject.toml b/python/pyproject.toml
index 3dd580cdf470..bf30da7ef96e 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -86,7 +86,7 @@ ollama = [
     "ollama ~= 0.4"
 ]
 onnx = [
-    "onnxruntime-genai ~= 0.4; platform_system != 'Darwin'"
+    "onnxruntime-genai ~= 0.5"
 ]
 anthropic = [
     "anthropic ~= 0.32"
@@ -156,7 +156,8 @@ filterwarnings = [
 ]
 timeout = 120
 markers = [
-    "ollama: mark a test as requiring the Ollama service (use \"not ollama\" to skip those tests)"
+    "ollama: mark a test as requiring the Ollama service (use \"not ollama\" to skip those tests)",
+    "onnx: mark a test as requiring the Onnx service (use \"not onnx\" to skip those tests)"
 ]
 
 [tool.ruff]
diff --git a/python/tests/integration/completions/chat_completion_test_base.py b/python/tests/integration/completions/chat_completion_test_base.py
index d05157e607c5..7a4db5b8fcfe 100644
--- a/python/tests/integration/completions/chat_completion_test_base.py
+++ b/python/tests/integration/completions/chat_completion_test_base.py
@@ -2,7 +2,6 @@
 
 
 import os
-import platform
 import sys
 from typing import Annotated
 
@@ -22,6 +21,7 @@
 from semantic_kernel.connectors.ai.google.vertex_ai import VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
 from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
 from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate
 from semantic_kernel.connectors.ai.open_ai import (
     AzureChatCompletion,
     AzureChatPromptExecutionSettings,
@@ -71,12 +71,6 @@
 bedrock_setup: bool = is_service_setup_for_testing(["AWS_DEFAULT_REGION"], raise_if_not_set=False)
 
 
-skip_on_mac_available = platform.system() == "Darwin"
-if not skip_on_mac_available:
-    from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
-    from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate
-
-
 # A mock plugin that contains a function that returns a complex object.
 class PersonDetails(KernelBaseModel):
     id: str
@@ -155,7 +149,7 @@ def services(self) -> dict[str, tuple[ServiceType | None, type[PromptExecutionSe
             "vertex_ai": (VertexAIChatCompletion() if vertex_ai_setup else None, VertexAIChatPromptExecutionSettings),
             "onnx_gen_ai": (
                 OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3V) if onnx_setup else None,
-                OnnxGenAIPromptExecutionSettings if not skip_on_mac_available else None,
+                OnnxGenAIPromptExecutionSettings,
             ),
             "bedrock_amazon_titan": (
                 BedrockChatCompletion(model_id="amazon.titan-text-premier-v1:0") if bedrock_setup else None,
diff --git a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
index f45f3367268c..2eb52e22f1e5 100644
--- a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
+++ b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
@@ -118,7 +118,10 @@
                 ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="Where was it made?")]),
             ],
             {},
-            marks=pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"),
+            marks=(
+                pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"),
+                pytest.mark.onnx,
+            ),
             id="onnx_gen_ai_image_input_file",
         ),
         pytest.param(
diff --git a/python/tests/integration/completions/test_chat_completions.py b/python/tests/integration/completions/test_chat_completions.py
index e3a77542f0f6..67eab08b0a90 100644
--- a/python/tests/integration/completions/test_chat_completions.py
+++ b/python/tests/integration/completions/test_chat_completions.py
@@ -150,7 +150,10 @@ class Reasoning(KernelBaseModel):
                 ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="How are you today?")]),
             ],
             {},
-            marks=pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"),
+            marks=(
+                pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"),
+                pytest.mark.onnx,
+            ),
             id="onnx_gen_ai",
         ),
         # endregion
diff --git a/python/tests/integration/completions/test_text_completion.py b/python/tests/integration/completions/test_text_completion.py
index 3e9b34ef76aa..7e6792de4465 100644
--- a/python/tests/integration/completions/test_text_completion.py
+++ b/python/tests/integration/completions/test_text_completion.py
@@ -1,6 +1,5 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-import platform
 import sys
 from functools import partial
 from typing import Any
@@ -19,6 +18,7 @@
 from semantic_kernel.connectors.ai.google.vertex_ai import VertexAITextCompletion, VertexAITextPromptExecutionSettings
 from semantic_kernel.connectors.ai.hugging_face import HuggingFacePromptExecutionSettings, HuggingFaceTextCompletion
 from semantic_kernel.connectors.ai.ollama import OllamaTextCompletion, OllamaTextPromptExecutionSettings
+from semantic_kernel.connectors.ai.onnx import OnnxGenAIPromptExecutionSettings, OnnxGenAITextCompletion
 from semantic_kernel.connectors.ai.open_ai import (
     AzureOpenAISettings,
     AzureTextCompletion,
@@ -43,11 +43,6 @@
 )  # Tests are optional for ONNX
 bedrock_setup = is_service_setup_for_testing(["AWS_DEFAULT_REGION"], raise_if_not_set=False)
 
-skip_on_mac_available = platform.system() == "Darwin"
-if not skip_on_mac_available:
-    from semantic_kernel.connectors.ai.onnx import OnnxGenAIPromptExecutionSettings, OnnxGenAITextCompletion
-
-
 pytestmark = pytest.mark.parametrize(
     "service_id, execution_settings_kwargs, inputs, kwargs",
     [
@@ -128,7 +123,10 @@
             {},
             ["<|user|>Repeat the word Hello<|end|><|assistant|>"],
             {},
-            marks=pytest.mark.skipif(not onnx_setup, reason="Need local Onnx setup"),
+            marks=(
+                pytest.mark.skipif(not onnx_setup, reason="Need a Onnx Model setup"),
+                pytest.mark.onnx,
+            ),
             id="onnx_gen_ai_text_completion",
         ),
         pytest.param(
@@ -242,7 +240,7 @@ def services(self) -> dict[str, tuple[ServiceType | None, type[PromptExecutionSe
             ),
             "onnx_gen_ai": (
                 OnnxGenAITextCompletion() if onnx_setup else None,
-                OnnxGenAIPromptExecutionSettings if not skip_on_mac_available else None,
+                OnnxGenAIPromptExecutionSettings,
             ),
             # Amazon Bedrock supports models from multiple providers but requests to and responses from the models are
             # inconsistent. So we need to test each model separately.
diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py
index 5454f4d52504..30c9573fef6c 100644
--- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py
+++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py
@@ -1,185 +1,186 @@
 # Copyright (c) Microsoft. All rights reserved.
 import json
 import os
-import platform
 from unittest.mock import MagicMock, mock_open, patch
 
 import pytest
 
+from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate
 from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent, ImageContent
 from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidExecutionSettingsError
 from semantic_kernel.kernel import Kernel
 from tests.unit.connectors.ai.onnx.conftest import gen_ai_config, gen_ai_config_vision
 
-skip_on_mac_available = platform.system() == "Darwin"
-if not skip_on_mac_available:
-    from semantic_kernel.connectors.ai.onnx import (  # noqa: E402
-        OnnxGenAIChatCompletion,
-        OnnxGenAIPromptExecutionSettings,
-    )
-    from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate  # noqa: E402
-
-
-@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS")
-class TestOnnxChatCompletion:
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    def test_onnx_chat_completion_with_valid_env_variable(self, gen_ai_config, model, tokenizer, onnx_unit_test_env):
-        service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env")
-        assert not service.enable_multi_modality
-
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    def test_onnx_chat_completion_with_vision_valid_env_variable(
-        self, gen_ai_vision_config, model, tokenizer, onnx_unit_test_env
-    ):
-        service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env")
-        assert service.enable_multi_modality
-
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    def test_onnx_chat_completion_with_valid_parameter(self, gen_ai_config, model, tokenizer):
-        assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template=ONNXTemplate.PHI3)
-
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    def test_onnx_chat_completion_with_str_template(self, gen_ai_config, model, tokenizer):
-        assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template="phi3")
-
-    def test_onnx_chat_completion_with_invalid_model(self):
-        with pytest.raises(ServiceInitializationError):
-            OnnxGenAIChatCompletion(
-                ai_model_path="/invalid_path",
-                template=ONNXTemplate.PHI3,
-            )
-
-    def test_onnx_chat_completion_without_prompt_template(self):
-        with pytest.raises(TypeError):
-            OnnxGenAIChatCompletion()
-
-    def test_onnx_chat_completion_with_invalid_env_variable(self, onnx_unit_test_env):
-        with pytest.raises(ServiceInitializationError):
-            OnnxGenAIChatCompletion(
-                template=ONNXTemplate.PHI3,
-            )
-
-    @pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_CHAT_MODEL_FOLDER"]], indirect=True)
-    def test_onnx_chat_completion_with_missing_ai_path(self, onnx_unit_test_env):
-        with pytest.raises(ServiceInitializationError):
-            OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env")
-
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    async def test_onnx_chat_completion(self, gen_ai_config, model, tokenizer):
-        generator_mock = MagicMock()
-        generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]]
-
-        chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test")
 
-        history = ChatHistory()
-        history.add_system_message("test")
-        history.add_user_message("test")
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+def test_onnx_chat_completion_with_valid_env_variable(gen_ai_config, model, tokenizer, onnx_unit_test_env):
+    service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env")
+    assert not service.enable_multi_modality
+
+
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+def test_onnx_chat_completion_with_vision_valid_env_variable(
+    gen_ai_vision_config, model, tokenizer, onnx_unit_test_env
+):
+    service = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env")
+    assert service.enable_multi_modality
+
+
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+def test_onnx_chat_completion_with_valid_parameter(gen_ai_config, model, tokenizer):
+    assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template=ONNXTemplate.PHI3)
+
+
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+def test_onnx_chat_completion_with_str_template(gen_ai_config, model, tokenizer):
+    assert OnnxGenAIChatCompletion(ai_model_path="/valid_path", template="phi3")
+
+
+def test_onnx_chat_completion_with_invalid_model():
+    with pytest.raises(ServiceInitializationError):
+        OnnxGenAIChatCompletion(
+            ai_model_path="/invalid_path",
+            template=ONNXTemplate.PHI3,
+        )
+
+
+def test_onnx_chat_completion_without_prompt_template():
+    with pytest.raises(TypeError):
+        OnnxGenAIChatCompletion()
+
+
+def test_onnx_chat_completion_with_invalid_env_variable(onnx_unit_test_env):
+    with pytest.raises(ServiceInitializationError):
+        OnnxGenAIChatCompletion(
+            template=ONNXTemplate.PHI3,
+        )
+
+
+@pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_CHAT_MODEL_FOLDER"]], indirect=True)
+def test_onnx_chat_completion_with_missing_ai_path(onnx_unit_test_env):
+    with pytest.raises(ServiceInitializationError):
+        OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, env_file_path="test.env")
 
-        with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock):
-            completed_text: ChatMessageContent = await chat_completion.get_chat_message_content(
-                prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel()
-            )
 
-        assert str(completed_text) == "Hello"
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+async def test_onnx_chat_completion(gen_ai_config, model, tokenizer):
+    generator_mock = MagicMock()
+    generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]]
 
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    async def test_onnx_chat_completion_streaming(self, gen_ai_config, model, tokenizer):
-        generator_mock = MagicMock()
-        generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]]
+    chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test")
 
-        chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test")
+    history = ChatHistory()
+    history.add_system_message("test")
+    history.add_user_message("test")
+
+    with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock):
+        completed_text: ChatMessageContent = await chat_completion.get_chat_message_content(
+            prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel()
+        )
+
+    assert str(completed_text) == "Hello"
+
+
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+async def test_onnx_chat_completion_streaming(gen_ai_config, model, tokenizer):
+    generator_mock = MagicMock()
+    generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]]
+
+    chat_completion = OnnxGenAIChatCompletion(template=ONNXTemplate.PHI3, ai_model_path="test")
+
+    history = ChatHistory()
+    history.add_system_message("test")
+    history.add_user_message("test")
+
+    completed_text: str = ""
+
+    with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock):
+        async for chunk in chat_completion.get_streaming_chat_message_content(
+            prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel()
+        ):
+            completed_text += str(chunk)
+
+    assert completed_text == "Hello"
+
+
+@patch("onnxruntime_genai.Model")
+def test_onnx_chat_get_image_history(model):
+    builtin_open = open  # save the unpatched version
+
+    def patch_open(*args, **kwargs):
+        if "genai_config.json" in str(args[0]):
+            # mocked open for path "genai_config.json"
+            return mock_open(read_data=json.dumps(gen_ai_config_vision))(*args, **kwargs)
+        # unpatched version for every other path
+        return builtin_open(*args, **kwargs)
+
+    with patch("builtins.open", patch_open):
+        chat_completion = OnnxGenAIChatCompletion(
+            template=ONNXTemplate.PHI3,
+            ai_model_path="test",
+        )
+
+        image_content = ImageContent.from_image_path(
+            image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg")
+        )
 
         history = ChatHistory()
         history.add_system_message("test")
         history.add_user_message("test")
+        history.add_message(
+            ChatMessageContent(
+                role=AuthorRole.USER,
+                items=[image_content],
+            ),
+        )
+
+        last_image = chat_completion._get_images_from_history(history)
+        assert last_image == image_content
+
+
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+async def test_onnx_chat_get_image_history_with_not_multimodal(model, tokenizer):
+    builtin_open = open  # save the unpatched version
+
+    def patch_open(*args, **kwargs):
+        if "genai_config.json" in str(args[0]):
+            # mocked open for path "genai_config.json"
+            return mock_open(read_data=json.dumps(gen_ai_config))(*args, **kwargs)
+        # unpatched version for every other path
+        return builtin_open(*args, **kwargs)
+
+    with patch("builtins.open", patch_open):
+        chat_completion = OnnxGenAIChatCompletion(
+            template=ONNXTemplate.PHI3,
+            ai_model_path="test",
+        )
+
+        image_content = ImageContent.from_image_path(
+            image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg")
+        )
 
-        completed_text: str = ""
-
-        with patch.object(chat_completion, "_generate_next_token_async", return_value=generator_mock):
-            async for chunk in chat_completion.get_streaming_chat_message_content(
-                prompt="test", chat_history=history, settings=OnnxGenAIPromptExecutionSettings(), kernel=Kernel()
-            ):
-                completed_text += str(chunk)
-
-        assert completed_text == "Hello"
-
-    @patch("onnxruntime_genai.Model")
-    def test_onnx_chat_get_image_history(self, model):
-        builtin_open = open  # save the unpatched version
-
-        def patch_open(*args, **kwargs):
-            if "genai_config.json" in str(args[0]):
-                # mocked open for path "genai_config.json"
-                return mock_open(read_data=json.dumps(gen_ai_config_vision))(*args, **kwargs)
-            # unpatched version for every other path
-            return builtin_open(*args, **kwargs)
-
-        with patch("builtins.open", patch_open):
-            chat_completion = OnnxGenAIChatCompletion(
-                template=ONNXTemplate.PHI3,
-                ai_model_path="test",
-            )
-
-            image_content = ImageContent.from_image_path(
-                image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg")
-            )
-
-            history = ChatHistory()
-            history.add_system_message("test")
-            history.add_user_message("test")
-            history.add_message(
-                ChatMessageContent(
-                    role=AuthorRole.USER,
-                    items=[image_content],
-                ),
-            )
-
-            last_image = chat_completion._get_images_from_history(history)
-            assert last_image == image_content
-
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    async def test_onnx_chat_get_image_history_with_not_multimodal(self, model, tokenizer):
-        builtin_open = open  # save the unpatched version
-
-        def patch_open(*args, **kwargs):
-            if "genai_config.json" in str(args[0]):
-                # mocked open for path "genai_config.json"
-                return mock_open(read_data=json.dumps(gen_ai_config))(*args, **kwargs)
-            # unpatched version for every other path
-            return builtin_open(*args, **kwargs)
-
-        with patch("builtins.open", patch_open):
-            chat_completion = OnnxGenAIChatCompletion(
-                template=ONNXTemplate.PHI3,
-                ai_model_path="test",
-            )
-
-            image_content = ImageContent.from_image_path(
-                image_path=os.path.join(os.path.dirname(__file__), "../../../../../", "assets/sample_image.jpg")
-            )
-
-            history = ChatHistory()
-            history.add_system_message("test")
-            history.add_user_message("test")
-            history.add_message(
-                ChatMessageContent(
-                    role=AuthorRole.USER,
-                    items=[image_content],
-                ),
-            )
-
-            with pytest.raises(ServiceInvalidExecutionSettingsError):
-                _ = await chat_completion._get_images_from_history(history)
+        history = ChatHistory()
+        history.add_system_message("test")
+        history.add_user_message("test")
+        history.add_message(
+            ChatMessageContent(
+                role=AuthorRole.USER,
+                items=[image_content],
+            ),
+        )
+
+        with pytest.raises(ServiceInvalidExecutionSettingsError):
+            _ = await chat_completion._get_images_from_history(history)
diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py
index 842c34f5cf3b..09435f02667f 100644
--- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py
+++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_text_completion.py
@@ -1,77 +1,77 @@
 # Copyright (c) Microsoft. All rights reserved.
 import json
-import platform
 from unittest.mock import MagicMock, mock_open, patch
 
 import pytest
 
+from semantic_kernel.connectors.ai.onnx import (  # noqa: E402
+    OnnxGenAIPromptExecutionSettings,
+    OnnxGenAITextCompletion,
+)
 from semantic_kernel.contents import TextContent
 from semantic_kernel.exceptions import ServiceInitializationError
 from tests.unit.connectors.ai.onnx.conftest import gen_ai_config
 
-skip_on_mac_available = platform.system() == "Darwin"
-if not skip_on_mac_available:
-    from semantic_kernel.connectors.ai.onnx import (  # noqa: E402
-        OnnxGenAIPromptExecutionSettings,
-        OnnxGenAITextCompletion,
-    )
-
-
-@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS")
-class TestOnnxTextCompletion:
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    def test_onnx_chat_completion_with_valid_env_variable(self, gen_ai_config, model, tokenizer, onnx_unit_test_env):
-        assert OnnxGenAITextCompletion(env_file_path="test.env")
-
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    def test_onnx_chat_completion_with_valid_parameter(self, gen_ai_config, model, tokenizer):
-        assert OnnxGenAITextCompletion(ai_model_path="/valid_path")
-
-    def test_onnx_chat_completion_with_invalid_model(self):
-        with pytest.raises(ServiceInitializationError):
-            OnnxGenAITextCompletion(ai_model_path="/invalid_path")
-
-    def test_onnx_chat_completion_with_invalid_env_variable(self, onnx_unit_test_env):
-        with pytest.raises(ServiceInitializationError):
-            OnnxGenAITextCompletion()
-
-    @pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_TEXT_MODEL_FOLDER"]], indirect=True)
-    def test_onnx_chat_completion_with_missing_ai_path(self, onnx_unit_test_env):
-        with pytest.raises(ServiceInitializationError):
-            OnnxGenAITextCompletion(env_file_path="test.env")
-
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    async def test_onnx_text_completion(self, gen_ai_config, model, tokenizer):
-        generator_mock = MagicMock()
-        generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]]
-
-        text_completion = OnnxGenAITextCompletion(ai_model_path="test")
-        with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock):
-            completed_text: TextContent = await text_completion.get_text_content(
-                prompt="test", settings=OnnxGenAIPromptExecutionSettings()
-            )
-
-        assert completed_text.text == "Hello"
-
-    @patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
-    @patch("onnxruntime_genai.Model")
-    @patch("onnxruntime_genai.Tokenizer")
-    async def test_onnx_text_completion_streaming(self, gen_ai_config, model, tokenizer):
-        generator_mock = MagicMock()
-        generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]]
-
-        text_completion = OnnxGenAITextCompletion(ai_model_path="test")
-        completed_text: str = ""
-        with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock):
-            async for chunk in text_completion.get_streaming_text_content(
-                prompt="test", settings=OnnxGenAIPromptExecutionSettings()
-            ):
-                completed_text += chunk.text
-
-        assert completed_text == "Hello"
+
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+def test_onnx_chat_completion_with_valid_env_variable(gen_ai_config, model, tokenizer, onnx_unit_test_env):
+    assert OnnxGenAITextCompletion(env_file_path="test.env")
+
+
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+def test_onnx_chat_completion_with_valid_parameter(gen_ai_config, model, tokenizer):
+    assert OnnxGenAITextCompletion(ai_model_path="/valid_path")
+
+
+def test_onnx_chat_completion_with_invalid_model():
+    with pytest.raises(ServiceInitializationError):
+        OnnxGenAITextCompletion(ai_model_path="/invalid_path")
+
+
+def test_onnx_chat_completion_with_invalid_env_variable(onnx_unit_test_env):
+    with pytest.raises(ServiceInitializationError):
+        OnnxGenAITextCompletion()
+
+
+@pytest.mark.parametrize("exclude_list", [["ONNX_GEN_AI_TEXT_MODEL_FOLDER"]], indirect=True)
+def test_onnx_chat_completion_with_missing_ai_path(onnx_unit_test_env):
+    with pytest.raises(ServiceInitializationError):
+        OnnxGenAITextCompletion(env_file_path="test.env")
+
+
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+async def test_onnx_text_completion(gen_ai_config, model, tokenizer):
+    generator_mock = MagicMock()
+    generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]]
+
+    text_completion = OnnxGenAITextCompletion(ai_model_path="test")
+    with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock):
+        completed_text: TextContent = await text_completion.get_text_content(
+            prompt="test", settings=OnnxGenAIPromptExecutionSettings()
+        )
+
+    assert completed_text.text == "Hello"
+
+
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config))
+@patch("onnxruntime_genai.Model")
+@patch("onnxruntime_genai.Tokenizer")
+async def test_onnx_text_completion_streaming(gen_ai_config, model, tokenizer):
+    generator_mock = MagicMock()
+    generator_mock.__aiter__.return_value = [["H"], ["e"], ["l"], ["l"], ["o"]]
+
+    text_completion = OnnxGenAITextCompletion(ai_model_path="test")
+    completed_text: str = ""
+    with patch.object(text_completion, "_generate_next_token_async", return_value=generator_mock):
+        async for chunk in text_completion.get_streaming_text_content(
+            prompt="test", settings=OnnxGenAIPromptExecutionSettings()
+        ):
+            completed_text += chunk.text
+
+    assert completed_text == "Hello"
diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py
index e746a1624a01..b4cb1e281826 100644
--- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py
+++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py
@@ -1,97 +1,93 @@
 # Copyright (c) Microsoft. All rights reserved.
-import platform
+from semantic_kernel.connectors.ai.onnx.utils import (
+    gemma_template,
+    llama_template,
+    phi3_template,
+    phi3v_template,
+)
+from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent
 
-import pytest
 
-from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent
+def test_phi3v_template_with_text_and_image():
+    history = ChatHistory(
+        messages=[
+            {"role": AuthorRole.SYSTEM, "content": "System message"},
+            {
+                "role": AuthorRole.USER,
+                "items": [TextContent(text="User text message"), ImageContent(url="http://example.com/image.png")],
+            },
+            {"role": AuthorRole.ASSISTANT, "content": "Assistant message"},
+        ]
+    )
+
+    expected_output = (
+        "<|system|>\nSystem message<|end|>\n"
+        "<|user|>\nUser text message<|end|>\n"
+        "<|image_1|>\n"
+        "<|assistant|>\nAssistant message<|end|>\n"
+        "<|assistant|>\n"
+    )
+
+    assert phi3v_template(history) == expected_output
+
 
-skip_on_mac_available = platform.system() == "Darwin"
-if not skip_on_mac_available:
-    from semantic_kernel.connectors.ai.onnx.utils import (  # noqa: E402
-        gemma_template,
-        llama_template,
-        phi3_template,
-        phi3v_template,
+def test_phi3_template_with_only_text():
+    history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}])
+
+    expected_output = "<|user|>\nUser text message<|end|>\n<|assistant|>\n"
+
+    assert phi3_template(history) == expected_output
+
+
+def test_gemma_template_with_user_and_assistant_messages():
+    history = ChatHistory(
+        messages=[
+            {"role": AuthorRole.USER, "content": "User text message"},
+            {"role": AuthorRole.ASSISTANT, "content": "Assistant message"},
+        ]
     )
 
+    expected_output = (
+        "<bos>"
+        "<start_of_turn>user\nUser text message<end_of_turn>\n"
+        "<start_of_turn>model\nAssistant message<end_of_turn>\n"
+        "<start_of_turn>model\n"
+    )
+
+    assert gemma_template(history) == expected_output
+
+
+def test_gemma_template_with_only_user_message():
+    history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}])
+
+    expected_output = "<bos><start_of_turn>user\nUser text message<end_of_turn>\n<start_of_turn>model\n"
+
+    assert gemma_template(history) == expected_output
+
+
+def test_llama_template_with_user_and_assistant_messages():
+    history = ChatHistory(
+        messages=[
+            {"role": AuthorRole.USER, "content": "User text message"},
+            {"role": AuthorRole.ASSISTANT, "content": "Assistant message"},
+        ]
+    )
+
+    expected_output = (
+        "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>"
+        "<|start_header_id|>assistant<|end_header_id|>\n\nAssistant message<|eot_id|>"
+        "<|start_header_id|>assistant<|end_header_id|>"
+    )
+
+    assert llama_template(history) == expected_output
+
+
+def test_llama_template_with_only_user_message():
+    history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}])
+
+    expected_output = (
+        "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>"
+        "<|start_header_id|>assistant<|end_header_id|>"
+    )
 
-@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS")
-class TestOnnxUtils:
-    def test_phi3v_template_with_text_and_image(self):
-        history = ChatHistory(
-            messages=[
-                {"role": AuthorRole.SYSTEM, "content": "System message"},
-                {
-                    "role": AuthorRole.USER,
-                    "items": [TextContent(text="User text message"), ImageContent(url="http://example.com/image.png")],
-                },
-                {"role": AuthorRole.ASSISTANT, "content": "Assistant message"},
-            ]
-        )
-
-        expected_output = (
-            "<|system|>\nSystem message<|end|>\n"
-            "<|user|>\nUser text message<|end|>\n"
-            "<|image_1|>\n"
-            "<|assistant|>\nAssistant message<|end|>\n"
-            "<|assistant|>\n"
-        )
-
-        assert phi3v_template(history) == expected_output
-
-    def test_phi3_template_with_only_text(self):
-        history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}])
-
-        expected_output = "<|user|>\nUser text message<|end|>\n<|assistant|>\n"
-
-        assert phi3_template(history) == expected_output
-
-    def test_gemma_template_with_user_and_assistant_messages(self):
-        history = ChatHistory(
-            messages=[
-                {"role": AuthorRole.USER, "content": "User text message"},
-                {"role": AuthorRole.ASSISTANT, "content": "Assistant message"},
-            ]
-        )
-
-        expected_output = (
-            "<bos>"
-            "<start_of_turn>user\nUser text message<end_of_turn>\n"
-            "<start_of_turn>model\nAssistant message<end_of_turn>\n"
-            "<start_of_turn>model\n"
-        )
-
-        assert gemma_template(history) == expected_output
-
-    def test_gemma_template_with_only_user_message(self):
-        history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}])
-
-        expected_output = "<bos><start_of_turn>user\nUser text message<end_of_turn>\n<start_of_turn>model\n"
-
-        assert gemma_template(history) == expected_output
-
-    def test_llama_template_with_user_and_assistant_messages(self):
-        history = ChatHistory(
-            messages=[
-                {"role": AuthorRole.USER, "content": "User text message"},
-                {"role": AuthorRole.ASSISTANT, "content": "Assistant message"},
-            ]
-        )
-
-        expected_output = (
-            "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>"
-            "<|start_header_id|>assistant<|end_header_id|>\n\nAssistant message<|eot_id|>"
-            "<|start_header_id|>assistant<|end_header_id|>"
-        )
-
-        assert llama_template(history) == expected_output
-
-    def test_llama_template_with_only_user_message(self):
-        history = ChatHistory(messages=[{"role": AuthorRole.USER, "content": "User text message"}])
-
-        expected_output = (
-            "<|start_header_id|>user<|end_header_id|>\n\nUser text message<|eot_id|>"
-            "<|start_header_id|>assistant<|end_header_id|>"
-        )
-
-        assert llama_template(history) == expected_output
+    assert llama_template(history) == expected_output
diff --git a/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py b/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py
index 80c84619d0a2..3d6942ecb8a5 100644
--- a/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py
+++ b/python/tests/unit/connectors/ai/onnx/test_onnx_prompt_execution_settings.py
@@ -1,85 +1,84 @@
 # Copyright (c) Microsoft. All rights reserved.
-import platform
-
 import pytest
 from pydantic import ValidationError
 
+from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import (
+    OnnxGenAIPromptExecutionSettings,
+)
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
-skip_on_mac_available = platform.system() == "Darwin"
-if not skip_on_mac_available:
-    from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import (
-        OnnxGenAIPromptExecutionSettings,
+
+def test_default_onnx_chat_prompt_execution_settings():
+    settings = OnnxGenAIPromptExecutionSettings()
+    assert settings.temperature is None
+    assert settings.top_p is None
+
+
+def test_custom_onnx_chat_prompt_execution_settings():
+    settings = OnnxGenAIPromptExecutionSettings(
+        temperature=0.5,
+        top_p=0.5,
+        max_length=128,
     )
+    assert settings.temperature == 0.5
+    assert settings.top_p == 0.5
+    assert settings.max_length == 128
 
 
-@pytest.mark.skipif(skip_on_mac_available, reason="OnnxRuntime is not available on macOS")
-class TestOnnxPromptExecutionSettings:
-    def test_default_onnx_chat_prompt_execution_settings(self):
-        settings = OnnxGenAIPromptExecutionSettings()
-        assert settings.temperature is None
-        assert settings.top_p is None
+def test_onnx_chat_prompt_execution_settings_from_default_completion_config():
+    settings = PromptExecutionSettings(service_id="test_service")
+    chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings)
+    assert chat_settings.service_id == "test_service"
+    assert chat_settings.temperature is None
+    assert chat_settings.top_p is None
 
-    def test_custom_onnx_chat_prompt_execution_settings(self):
-        settings = OnnxGenAIPromptExecutionSettings(
-            temperature=0.5,
-            top_p=0.5,
-            max_length=128,
-        )
-        assert settings.temperature == 0.5
-        assert settings.top_p == 0.5
-        assert settings.max_length == 128
 
-    def test_onnx_chat_prompt_execution_settings_from_default_completion_config(self):
-        settings = PromptExecutionSettings(service_id="test_service")
-        chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings)
-        assert chat_settings.service_id == "test_service"
-        assert chat_settings.temperature is None
-        assert chat_settings.top_p is None
+def test_onnx_chat_prompt_execution_settings_from_onnx_prompt_execution_settings():
+    chat_settings = OnnxGenAIPromptExecutionSettings(service_id="test_service", temperature=1.0)
+    new_settings = OnnxGenAIPromptExecutionSettings(service_id="test_2", temperature=0.0)
+    chat_settings.update_from_prompt_execution_settings(new_settings)
+    assert chat_settings.service_id == "test_2"
+    assert chat_settings.temperature == 0.0
 
-    def test_onnx_chat_prompt_execution_settings_from_onnx_prompt_execution_settings(self):
-        chat_settings = OnnxGenAIPromptExecutionSettings(service_id="test_service", temperature=1.0)
-        new_settings = OnnxGenAIPromptExecutionSettings(service_id="test_2", temperature=0.0)
-        chat_settings.update_from_prompt_execution_settings(new_settings)
-        assert chat_settings.service_id == "test_2"
-        assert chat_settings.temperature == 0.0
 
-    def test_onnx_chat_prompt_execution_settings_from_custom_completion_config(self):
-        settings = PromptExecutionSettings(
-            service_id="test_service",
-            extension_data={
-                "temperature": 0.5,
-                "top_p": 0.5,
-                "max_length": 128,
-            },
-        )
-        chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings)
-        assert chat_settings.temperature == 0.5
-        assert chat_settings.top_p == 0.5
-        assert chat_settings.max_length == 128
+def test_onnx_chat_prompt_execution_settings_from_custom_completion_config():
+    settings = PromptExecutionSettings(
+        service_id="test_service",
+        extension_data={
+            "temperature": 0.5,
+            "top_p": 0.5,
+            "max_length": 128,
+        },
+    )
+    chat_settings = OnnxGenAIPromptExecutionSettings.from_prompt_execution_settings(settings)
+    assert chat_settings.temperature == 0.5
+    assert chat_settings.top_p == 0.5
+    assert chat_settings.max_length == 128
+
 
-    def test_create_options(self):
-        settings = OnnxGenAIPromptExecutionSettings(
+def test_create_options():
+    settings = OnnxGenAIPromptExecutionSettings(
+        service_id="test_service",
+        extension_data={
+            "temperature": 0.5,
+            "top_p": 0.5,
+            "max_length": 128,
+        },
+    )
+    options = settings.prepare_settings_dict()
+    assert options["temperature"] == 0.5
+    assert options["top_p"] == 0.5
+    assert options["max_length"] == 128
+
+
+def test_create_options_with_wrong_parameter():
+    with pytest.raises(ValidationError):
+        OnnxGenAIPromptExecutionSettings(
             service_id="test_service",
+            function_choice_behavior="auto",
             extension_data={
-                "temperature": 0.5,
+                "temperature": 10.0,
                 "top_p": 0.5,
                 "max_length": 128,
             },
         )
-        options = settings.prepare_settings_dict()
-        assert options["temperature"] == 0.5
-        assert options["top_p"] == 0.5
-        assert options["max_length"] == 128
-
-    def test_create_options_with_wrong_parameter(self):
-        with pytest.raises(ValidationError):
-            OnnxGenAIPromptExecutionSettings(
-                service_id="test_service",
-                function_choice_behavior="auto",
-                extension_data={
-                    "temperature": 10.0,
-                    "top_p": 0.5,
-                    "max_length": 128,
-                },
-            )
diff --git a/python/uv.lock b/python/uv.lock
index 471f32675a40..e359cea177f1 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -2694,6 +2694,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/7f/7f/7fbae15a3982dc9595e49ce0f19332423b260045d0a6afe93cdbe2f1f624/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3", size = 363333771 },
     { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 },
+    { url = "https://files.pythonhosted.org/packages/e2/2a/4f27ca96232e8b5269074a72e03b4e0d43aa68c9b965058b1684d07c6ff8/nvidia_cublas_cu12-12.4.5.8-py3-none-win_amd64.whl", hash = "sha256:5a796786da89203a0657eda402bcdcec6180254a8ac22d72213abc42069522dc", size = 396895858 },
 ]
 
 [[package]]
@@ -2703,6 +2704,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/93/b5/9fb3d00386d3361b03874246190dfec7b206fd74e6e287b26a8fcb359d95/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a", size = 12354556 },
     { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 },
+    { url = "https://files.pythonhosted.org/packages/f3/79/8cf313ec17c58ccebc965568e5bcb265cdab0a1df99c4e674bb7a3b99bfe/nvidia_cuda_cupti_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:5688d203301ab051449a2b1cb6690fbe90d2b372f411521c86018b950f3d7922", size = 9938035 },
 ]
 
 [[package]]
@@ -2712,6 +2714,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/77/aa/083b01c427e963ad0b314040565ea396f914349914c298556484f799e61b/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198", size = 24133372 },
     { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 },
+    { url = "https://files.pythonhosted.org/packages/7c/30/8c844bfb770f045bcd8b2c83455c5afb45983e1a8abf0c4e5297b481b6a5/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:a961b2f1d5f17b14867c619ceb99ef6fcec12e46612711bcec78eb05068a60ec", size = 19751955 },
 ]
 
 [[package]]
@@ -2721,6 +2724,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/a1/aa/b656d755f474e2084971e9a297def515938d56b466ab39624012070cb773/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3", size = 894177 },
     { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 },
+    { url = "https://files.pythonhosted.org/packages/a8/8b/450e93fab75d85a69b50ea2d5fdd4ff44541e0138db16f9cd90123ef4de4/nvidia_cuda_runtime_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:09c2e35f48359752dfa822c09918211844a3d93c100a715d79b59591130c5e1e", size = 878808 },
 ]
 
 [[package]]
@@ -2732,6 +2736,7 @@ dependencies = [
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 },
+    { url = "https://files.pythonhosted.org/packages/3f/d0/f90ee6956a628f9f04bf467932c0a25e5a7e706a684b896593c06c82f460/nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a", size = 679925892 },
 ]
 
 [[package]]
@@ -2744,6 +2749,7 @@ dependencies = [
 wheels = [
     { url = "https://files.pythonhosted.org/packages/7a/8a/0e728f749baca3fbeffad762738276e5df60851958be7783af121a7221e7/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399", size = 211422548 },
     { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 },
+    { url = "https://files.pythonhosted.org/packages/f6/ee/3f3f8e9874f0be5bbba8fb4b62b3de050156d159f8b6edc42d6f1074113b/nvidia_cufft_cu12-11.2.1.3-py3-none-win_amd64.whl", hash = "sha256:d802f4954291101186078ccbe22fc285a902136f974d369540fd4a5333d1440b", size = 210576476 },
 ]
 
 [[package]]
@@ -2753,6 +2759,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/80/9c/a79180e4d70995fdf030c6946991d0171555c6edf95c265c6b2bf7011112/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9", size = 56314811 },
     { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 },
+    { url = "https://files.pythonhosted.org/packages/1c/22/2573503d0d4e45673c263a313f79410e110eb562636b0617856fdb2ff5f6/nvidia_curand_cu12-10.3.5.147-py3-none-win_amd64.whl", hash = "sha256:f307cc191f96efe9e8f05a87096abc20d08845a841889ef78cb06924437f6771", size = 55799918 },
 ]
 
 [[package]]
@@ -2767,6 +2774,7 @@ dependencies = [
 wheels = [
     { url = "https://files.pythonhosted.org/packages/46/6b/a5c33cf16af09166845345275c34ad2190944bcc6026797a39f8e0a282e0/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e", size = 127634111 },
     { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 },
+    { url = "https://files.pythonhosted.org/packages/f2/be/d435b7b020e854d5d5a682eb5de4328fd62f6182507406f2818280e206e2/nvidia_cusolver_cu12-11.6.1.9-py3-none-win_amd64.whl", hash = "sha256:e77314c9d7b694fcebc84f58989f3aa4fb4cb442f12ca1a9bde50f5e8f6d1b9c", size = 125224015 },
 ]
 
 [[package]]
@@ -2779,6 +2787,7 @@ dependencies = [
 wheels = [
     { url = "https://files.pythonhosted.org/packages/96/a9/c0d2f83a53d40a4a41be14cea6a0bf9e668ffcf8b004bd65633f433050c0/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3", size = 207381987 },
     { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 },
+    { url = "https://files.pythonhosted.org/packages/a2/e0/3155ca539760a8118ec94cc279b34293309bcd14011fc724f87f31988843/nvidia_cusparse_cu12-12.3.1.170-py3-none-win_amd64.whl", hash = "sha256:9bc90fb087bc7b4c15641521f31c0371e9a612fc2ba12c338d3ae032e6b6797f", size = 204684315 },
 ]
 
 [[package]]
@@ -2796,6 +2805,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/02/45/239d52c05074898a80a900f49b1615d81c07fceadd5ad6c4f86a987c0bc4/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83", size = 20552510 },
     { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 },
+    { url = "https://files.pythonhosted.org/packages/81/19/0babc919031bee42620257b9a911c528f05fb2688520dcd9ca59159ffea8/nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1", size = 95336325 },
 ]
 
 [[package]]
@@ -2805,6 +2815,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/06/39/471f581edbb7804b39e8063d92fc8305bdc7a80ae5c07dbe6ea5c50d14a5/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3", size = 100417 },
     { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 },
+    { url = "https://files.pythonhosted.org/packages/54/1b/f77674fbb73af98843be25803bbd3b9a4f0a96c75b8d33a2854a5c7d2d77/nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485", size = 66307 },
 ]
 
 [[package]]
@@ -2874,11 +2885,17 @@ dependencies = [
     { name = "onnxruntime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/35/22a421f852eb14f47c33a4dd4c3ef58a2f3d5a96be8bb6d6cc271b2a0e83/onnxruntime_genai-0.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cd322ead0027fbfa309e7be76c4512157ad369dc189ab3334a58a199b4f58a02", size = 769921 },
+    { url = "https://files.pythonhosted.org/packages/7f/1b/5166ed4a73c5e9f92e6db4d7838923ffd595cea164661fae20d82e3a6966/onnxruntime_genai-0.5.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:79d721a33e80a9664aeeb87c0ceec75801fc81e48e8ff7940e3658d0b28f25cc", size = 869111 },
     { url = "https://files.pythonhosted.org/packages/12/5b/6f08f9435f0c3977046cb4292ab1e836c22cd7d56fc87ace4d2a90dfb828/onnxruntime_genai-0.5.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd7954f9dc829e69dabd7f676443529ac18171ec8077438c16364d381733070e", size = 1380370 },
     { url = "https://files.pythonhosted.org/packages/57/d6/91e486424f924c2a99e8f1bd201180979101ecc09bee1ca7f53dae1c8a38/onnxruntime_genai-0.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:4d2968df6d8064664a5f095006c70520f4ca689204b695e88951f088477bc1e0", size = 776263 },
+    { url = "https://files.pythonhosted.org/packages/3e/3d/e2d8f89c05c6cf35e2ade2b335b1b97725327591b8fb141d266ab98615f9/onnxruntime_genai-0.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:217c748f8ebd1a5082e1ad8ee8fc90fc1a4e9ce7839189f4c2c2545d1390af15", size = 769888 },
+    { url = "https://files.pythonhosted.org/packages/33/13/66ffa143cc82f8352ec87ba0501bc21e05dd9e84fbbad530e74a705ac911/onnxruntime_genai-0.5.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6194aabd589b3ffb571b325f504266ac47c33c434abfd87575c30d7a3e1179c9", size = 869092 },
     { url = "https://files.pythonhosted.org/packages/6a/17/a29c0cf89d90374234b8e510fcb970f2e043b42689b5ea23cbdab5a414b6/onnxruntime_genai-0.5.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:88edb36c9e2d670316f1e6e4ce27a86f212648a92053a94a31f88b1f4d6c0935", size = 1380461 },
     { url = "https://files.pythonhosted.org/packages/59/b1/acb1daf1a08c8098c828e7ea9e187b9728a8fc151a4df4911f988c08a874/onnxruntime_genai-0.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:09b697f955616156948f21366d13d02884a15521926f68a259722d9fa4437db4", size = 776308 },
     { url = "https://files.pythonhosted.org/packages/22/57/d249827c3e37abe528674bfa97de4c61b18afb452d2afced690a745e0866/onnxruntime_genai-0.5.2-cp311-cp311-win_arm64.whl", hash = "sha256:893be15d2113438e60b8a1c0095892e0fd4f2b01dd470d6197337db2a5778c88", size = 751552 },
+    { url = "https://files.pythonhosted.org/packages/cf/72/259de19e93e72b14d0a3910f1025f71da006a8dfc76c97792646b335a8a3/onnxruntime_genai-0.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6b438d7f4901081b8f3ff99db6c6ea15a3fcc107abce79859ff635e1278e26b0", size = 771097 },
+    { url = "https://files.pythonhosted.org/packages/8c/72/73c95e357ada258025236437fb2b4d56fb7e8594db6361f4560ea97ca06c/onnxruntime_genai-0.5.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:d7bffb799d44656b2615fc43130a1a287d57e8893b80523e560924cf05770f1d", size = 871450 },
     { url = "https://files.pythonhosted.org/packages/79/3d/43211c8a66d7ce54dea137ad7bec30767e3f2dc5e1e22befdcca290ebbe0/onnxruntime_genai-0.5.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb5b9650512e21a71d965e582d616b33df07978b0c3ecbd5bef0912a7b5f7832", size = 1380898 },
     { url = "https://files.pythonhosted.org/packages/9f/7b/53b217ed0db401877fafa2f63d2ce7de754899f2bdf4cb415931e2019f18/onnxruntime_genai-0.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:422e9af27f182247378e9423f5745becfaffcdf7a4f452da17fd5d9390770ca7", size = 776974 },
     { url = "https://files.pythonhosted.org/packages/08/c1/a69aeba29f40febd8d70d45044d4eb97905beb37fc8491b1628c8714ecc1/onnxruntime_genai-0.5.2-cp312-cp312-win_arm64.whl", hash = "sha256:315b23cb04749202c9cc3eb34f281bb4943de477a5aa46c99b940603b6a5d272", size = 751246 },
@@ -3559,6 +3576,8 @@ version = "6.1.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/26/10/2a30b13c61e7cf937f4adf90710776b7918ed0a9c434e2c38224732af310/psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a", size = 508565 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/da/2b/f4dea5d993d9cd22ad958eea828a41d5d225556123d372f02547c29c4f97/psutil-6.1.0-cp27-none-win32.whl", hash = "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e", size = 246648 },
+    { url = "https://files.pythonhosted.org/packages/9f/14/4aa97a7f2e0ac33a050d990ab31686d651ae4ef8c86661fef067f00437b9/psutil-6.1.0-cp27-none-win_amd64.whl", hash = "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85", size = 249905 },
     { url = "https://files.pythonhosted.org/packages/01/9e/8be43078a171381953cfee33c07c0d628594b5dbfc5157847b85022c2c1b/psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688", size = 247762 },
     { url = "https://files.pythonhosted.org/packages/1d/cb/313e80644ea407f04f6602a9e23096540d9dc1878755f3952ea8d3d104be/psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e", size = 248777 },
     { url = "https://files.pythonhosted.org/packages/65/8e/bcbe2025c587b5d703369b6a75b65d41d1367553da6e3f788aff91eaf5bd/psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38", size = 284259 },
@@ -4803,7 +4822,7 @@ ollama = [
     { name = "ollama", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 onnx = [
-    { name = "onnxruntime-genai", marker = "(platform_system != 'Darwin' and sys_platform == 'darwin') or (platform_system != 'Darwin' and sys_platform == 'linux') or (platform_system != 'Darwin' and sys_platform == 'win32')" },
+    { name = "onnxruntime-genai", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 pandas = [
     { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
@@ -4874,7 +4893,7 @@ requires-dist = [
     { name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.25.0" },
     { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" },
     { name = "ollama", marker = "extra == 'ollama'", specifier = "~=0.4" },
-    { name = "onnxruntime-genai", marker = "platform_system != 'Darwin' and extra == 'onnx'", specifier = "~=0.4" },
+    { name = "onnxruntime-genai", marker = "extra == 'onnx'", specifier = "~=0.5" },
     { name = "openai", specifier = "~=1.0" },
     { name = "openapi-core", specifier = ">=0.18,<0.20" },
     { name = "opentelemetry-api", specifier = "~=1.24" },