From 36701a2c7328eba05b7bfa1fb43ce1b57b7d04cb Mon Sep 17 00:00:00 2001 From: Evan Mattson <35585003+moonbox3@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:13:00 +0900 Subject: [PATCH] Python: Include a function_invoke_attempt index with Streaming CMC (#10009) ### Motivation and Context During auto function calling, we're yielding all messages back without any indication as to which invocation index they are related to. This information could be helpful to the caller to understand in which order message chunks were received during the auto function invocation loop. Depending upon the behavior of auto function calling, the `request_index` iterates up to the `maximum_auto_invoke_attempts`. The caller doesn't know today which function auto invoke attempt they're currently on -- so simply handing all yielded messages can be confusing. In a new PR, we will handle adding the `request_index` (perhaps with a different name) to make it easier to know which streaming message chunks to concatenate, which should help reduce the confusion down the line. ### Description This PR adds: - The `function_invoke_attempt` attribute to the `StreamingChatMessageContent` class. This can help callers/users track which streaming chat message chunks belong to which auto function invocation attempt. - A new keyword argument was added to the `_inner_get_streaming_chat_message_contents` to allow the `function_invoke_attempt` int to be passed through to the `StreamingChatMessageContent` creation in each AI Service. This **additive** keyword argument should not break existing. - Updates unit tests - Creates four new samples to showcase auto function calling: streaming auto invoke / manual invoke (print tool calls), and non-streaming auto invoke / manual invoke (print tool calls). These samples allow one to specify the AI service that supports function calling, as listed in the samples. - Closes #10006 ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../anthropic_api_function_calling.py | 206 ----------------- ...t_completion_with_auto_function_calling.py | 125 ++++++++++ ...on_with_auto_function_calling_streaming.py | 129 +++++++++++ ...completion_with_manual_function_calling.py | 166 ++++++++++++++ ..._with_manual_function_calling_streaming.py | 181 +++++++++++++++ .../chat_gpt_api_function_calling.py | 207 ----------------- .../chat_mistral_ai_api_function_calling.py | 215 ------------------ .../services/anthropic_chat_completion.py | 13 +- .../azure_ai_inference_chat_completion.py | 7 +- .../services/bedrock_chat_completion.py | 8 +- .../ai/chat_completion_client_base.py | 13 +- .../connectors/ai/function_calling_utils.py | 12 +- .../services/google_ai_chat_completion.py | 9 +- .../services/vertex_ai_chat_completion.py | 9 +- .../services/mistral_ai_chat_completion.py | 7 +- .../ollama/services/ollama_chat_completion.py | 15 +- .../services/onnx_gen_ai_chat_completion.py | 9 +- .../open_ai/services/azure_chat_completion.py | 3 +- .../services/open_ai_chat_completion_base.py | 6 +- .../streaming_chat_message_content.py | 36 ++- python/tests/conftest.py | 10 + python/tests/samples/test_concepts.py | 8 +- .../openapi_plugin/test_sk_openapi.py | 10 +- .../unit/functions/test_kernel_plugins.py | 8 +- python/tests/unit/kernel/test_kernel.py | 2 +- 25 files changed, 745 insertions(+), 669 deletions(-) delete mode 100644 python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py delete mode 100644 python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py delete mode 100644 python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py diff --git a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py b/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py deleted file mode 100644 index 5769943157db..000000000000 --- a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = False - -kernel = Kernel() - -# Note: the underlying model needs to support function calling. -# https://docs.anthropic.com/en/docs/build-with-claude/tool-use#choosing-a-model -kernel.add_service(AnthropicChatCompletion(service_id="chat", ai_model_id="claude-3-opus-20240229")) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"` or `"required"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -execution_settings = AnthropicChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py new file mode 100644 index 000000000000..c74ebc322489 --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py @@ -0,0 +1,125 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from typing import TYPE_CHECKING + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +if TYPE_CHECKING: + pass + +##################################################################### +# This sample demonstrates how to build a conversational chatbot # +# using Semantic Kernel, featuring auto function calling, # +# non-streaming responses, and support for math and time plugins. # +# The chatbot is designed to interact with the user, call functions # +# as needed, and return responses. # +##################################################################### + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# You can select from the following chat completion services that support function calling: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=True by default. +# With `auto_invoke=True`, the model will automatically choose and call functions as needed. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto() + +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +async def chat() -> bool: + """ + Continuously prompt the user for input and show the assistant's response. + Type 'exit' to exit. + """ + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + return False + + arguments["user_input"] = user_input + arguments["chat_history"] = history + + # Handle non-streaming responses + result = await kernel.invoke(chat_function, arguments=arguments) + + # Update the chat history with the user's input and the assistant's response + if result: + print(f"Mosscap:> {result}") + history.add_user_message(user_input) + history.add_message(result.value[0]) # Capture the full context of the response + + return True + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + chatting = True + while chatting: + chatting = await chat() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py new file mode 100644 index 000000000000..f7aa767ffa23 --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py @@ -0,0 +1,129 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +##################################################################### +# This sample demonstrates how to build a conversational chatbot # +# using Semantic Kernel, featuring auto function calling, # +# streaming responses, and support for math and time plugins. # +# The chatbot is designed to interact with the user, call functions # +# as needed, and return responses. # +##################################################################### + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# You can select from the following chat completion services that support function calling: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior. Here, we set it to Auto. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto() + +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + + while True: + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + break + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + break + + arguments["user_input"] = user_input + arguments["chat_history"] = history + + # Directly handle streaming of the assistant's response here + print("Mosscap:> ", end="", flush=True) + + streamed_response_chunks: list[StreamingChatMessageContent] = [] + + async for message in kernel.invoke_stream( + chat_function, + return_function_results=False, + arguments=arguments, + ): + msg = message[0] + + # We only expect assistant messages here. + if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: + continue + + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + + print("\n", flush=True) + + if streamed_response_chunks: + result = "".join([str(content) for content in streamed_response_chunks]) + history.add_user_message(user_input) + history.add_assistant_message(result) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py new file mode 100644 index 000000000000..162c415c4a64 --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py @@ -0,0 +1,166 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from typing import TYPE_CHECKING + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +if TYPE_CHECKING: + pass + +##################################################################### +# This sample demonstrates how to build a conversational chatbot # +# using Semantic Kernel, featuring manual function calling, # +# non-streaming responses, and support for math and time plugins. # +# The chatbot is designed to interact with the user, call functions # +# as needed, and return responses. With auto function calling # +# disabled, the tool calls will be printed to the console. # +##################################################################### + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# You can select from the following chat completion services that support function calling: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=False. +# With `FunctionChoiceBehavior(auto_invoke=False)`, the model may return tool call instructions +# that you must handle and call manually. We will only print the tool calls in this sample. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=False) + +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +def print_tool_calls(message: ChatMessageContent) -> None: + """ + A helper function to pretty print the tool calls found in a ChatMessageContent message. + This is useful when auto tool invocation is disabled and the model returns calls that you must handle. + """ + items = message.items + formatted_tool_calls = [] + for i, item in enumerate(items, start=1): + if isinstance(item, FunctionCallContent): + tool_call_id = item.id + function_name = item.name + function_arguments = item.arguments + formatted_str = ( + f"tool_call {i} id: {tool_call_id}\n" + f"tool_call {i} function name: {function_name}\n" + f"tool_call {i} arguments: {function_arguments}" + ) + formatted_tool_calls.append(formatted_str) + if len(formatted_tool_calls) > 0: + print("\n[Tool calls returned by the model]:\n" + "\n\n".join(formatted_tool_calls)) + else: + print("\n[No tool calls returned by the model]") + + +async def chat() -> bool: + """ + Continuously prompt the user for input and show the assistant's response. + Type 'exit' to exit. + """ + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + return False + + arguments["user_input"] = user_input + arguments["chat_history"] = history + + # Handle non-streaming responses + result = await kernel.invoke(chat_function, arguments=arguments) + + # If function calls are returned, we show them on the console. + if result and result.value: + # Extract function calls from the returned content + function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] + if len(function_calls) > 0: + print_tool_calls(result.value[0]) + # At this point, you'd handle these calls manually if desired. + # For now, we just print them. + return True + + # If no function calls to handle, just print the assistant's response + if result: + print(f"Mosscap:> {result}") + + # Update the chat history with the user's input and the assistant's response + if result: + history.add_user_message(user_input) + history.add_assistant_message(str(result)) + + return True + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + chatting = True + while chatting: + chatting = await chat() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py new file mode 100644 index 000000000000..360c0d670f45 --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py @@ -0,0 +1,181 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from functools import reduce +from typing import TYPE_CHECKING + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +if TYPE_CHECKING: + pass + +##################################################################### +# This sample demonstrates how to build a conversational chatbot # +# using Semantic Kernel, featuring dynamic function calling, # +# streaming responses, and support for math and time plugins. # +# The chatbot is designed to interact with the user, call functions # +# as needed, and return responses. If auto function calling is # +# disabled, then the tool calls will be printed to the console. # +##################################################################### + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# Configure the chat completion service and request settings. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior to Auto with auto_invoke=False. +# This means the model may return tool calls that must be manually handled. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=False) +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +def print_tool_calls(message: ChatMessageContent) -> None: + """ + A helper function to pretty print the tool calls found in a ChatMessageContent message. + This is useful when auto tool invocation is disabled and the model returns calls that you must handle. + """ + items = message.items + formatted_tool_calls = [] + for i, item in enumerate(items, start=1): + if isinstance(item, FunctionCallContent): + tool_call_id = item.id + function_name = item.name + function_arguments = item.arguments + formatted_str = ( + f"tool_call {i} id: {tool_call_id}\n" + f"tool_call {i} function name: {function_name}\n" + f"tool_call {i} arguments: {function_arguments}" + ) + formatted_tool_calls.append(formatted_str) + if len(formatted_tool_calls) > 0: + print("\n[Tool calls returned by the model]:\n" + "\n\n".join(formatted_tool_calls)) + else: + print("\n[No tool calls returned by the model]") + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + + while True: + # Get user input + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + break + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + break + + # Prepare arguments for the model invocation + arguments["user_input"] = user_input + arguments["chat_history"] = history + + print("Mosscap:> ", end="", flush=True) + + # Lists to store streamed chunks + streamed_tool_chunks: list[StreamingChatMessageContent] = [] + streamed_response_chunks: list[StreamingChatMessageContent] = [] + + async for message in kernel.invoke_stream( + chat_function, + return_function_results=False, + arguments=arguments, + ): + msg = message[0] + + # Expecting assistant messages only + if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: + continue + + # If auto_invoking is False, the model may send tool calls in separate chunks. + if hasattr(msg, "function_invoke_attempt"): + # This chunk is part of a tool call instruction + streamed_tool_chunks.append(msg) + else: + # Normal assistant response text + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + + print("\n", flush=True) + + # If we have tool call instructions + if streamed_tool_chunks: + # Group streamed tool chunks by `function_invoke_attempt` + grouped_chunks = {} + for chunk in streamed_tool_chunks: + key = getattr(chunk, "function_invoke_attempt", None) + if key is not None: + grouped_chunks.setdefault(key, []).append(chunk) + + # Process each group of chunks + for attempt, chunks in grouped_chunks.items(): + try: + combined_content = reduce(lambda first, second: first + second, chunks) + if hasattr(combined_content, "content"): + print(f"[function_invoke_attempt {attempt} content]:\n{combined_content.content}") + + print("[Auto function calling is OFF] Here are the returned tool calls:") + print_tool_calls(combined_content) + except Exception as e: + print(f"Error processing chunks for function_invoke_attempt {attempt}: {e}") + + # Update the chat history with user input and assistant response, if any + if streamed_response_chunks: + result = "".join([str(content) for content in streamed_response_chunks]) + history.add_user_message(user_input) + history.add_assistant_message(str(result)) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py deleted file mode 100644 index 2ced79d2f8be..000000000000 --- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = True - -kernel = Kernel() - -# Note: the underlying gpt-35/gpt-4 model version needs to be at least version 0613 to support tools. -kernel.add_service(OpenAIChatCompletion(service_id="chat")) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"`, `"required"`, or `"none"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -# Note: the number of responses for auto invoking tool calls is limited to 1. -# If configured to be greater than one, this value will be overridden to 1. -execution_settings = OpenAIChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content: list[StreamingChatMessageContent] = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py deleted file mode 100644 index 5ee05a835e2a..000000000000 --- a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = True - -kernel = Kernel() - -# Note: the underlying Model must be Mistral Small, Mistral Large, Mixtral 8x22B, Mistral Nemo. -# You can use MISTRALAI_API_KEY and MISTRALAI_CHAT_MODEL_ID environment variables to set the API key and model ID. -# Or just set it here in the Constructor for testing -kernel.add_service( - MistralAIChatCompletion( - service_id="chat", - # api_key=XXXXXXX, - # ai_model_id="mistral-large", - ) -) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"`, `"required"`, or `"none"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -# Note: the number of responses for auto invoking tool calls is limited to 1. -# If configured to be greater than one, this value will be overridden to 1. -execution_settings = MistralAIChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py index 4c4c9da92d60..87e967184234 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py @@ -170,6 +170,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, AnthropicChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -180,7 +181,7 @@ async def _inner_get_streaming_chat_message_contents( if settings.system is None and parsed_system_message is not None: settings.system = parsed_system_message - response = self._send_chat_stream_request(settings) + response = self._send_chat_stream_request(settings, function_invoke_attempt) if not isinstance(response, AsyncGenerator): raise ServiceInvalidResponseError("Expected an AsyncGenerator response.") @@ -282,6 +283,7 @@ def _create_streaming_chat_message_content( self, stream_event: TextEvent | ContentBlockStopEvent | RawMessageDeltaEvent, metadata: dict[str, Any] = {}, + function_invoke_attempt: int = 0, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a content block.""" items: list[STREAMING_ITEM_TYPES] = [] @@ -315,6 +317,7 @@ def _create_streaming_chat_message_content( role=AuthorRole.ASSISTANT, finish_reason=finish_reason, items=items, + function_invoke_attempt=function_invoke_attempt, ) async def _send_chat_request(self, settings: AnthropicChatPromptExecutionSettings) -> list["ChatMessageContent"]: @@ -334,7 +337,9 @@ async def _send_chat_request(self, settings: AnthropicChatPromptExecutionSetting return [self._create_chat_message_content(response, response_metadata)] async def _send_chat_stream_request( - self, settings: AnthropicChatPromptExecutionSettings + self, + settings: AnthropicChatPromptExecutionSettings, + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], None]: """Send the chat stream request. @@ -355,7 +360,9 @@ async def _send_chat_stream_request( isinstance(stream_event, ContentBlockStopEvent) and stream_event.content_block.type == "tool_use" ): - yield [self._create_streaming_chat_message_content(stream_event, metadata)] + yield [ + self._create_streaming_chat_message_content(stream_event, metadata, function_invoke_attempt) + ] except Exception as ex: raise ServiceResponseException( f"{type(self)} service failed to complete the request", diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py index e48268d223bb..8ac10561f142 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py @@ -138,6 +138,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, AzureAIInferenceChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -157,7 +158,8 @@ async def _inner_get_streaming_chat_message_contents( continue chunk_metadata = self._get_metadata_from_response(chunk) yield [ - self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices + self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) + for choice in chunk.choices ] @override @@ -255,6 +257,7 @@ def _create_streaming_chat_message_content( chunk: AsyncStreamingChatCompletions, choice: StreamingChatChoiceUpdate, metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. @@ -262,6 +265,7 @@ def _create_streaming_chat_message_content( chunk: The chunk from the response. choice: The choice from the response. metadata: The metadata from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -295,6 +299,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=FinishReason(choice.finish_reason) if choice.finish_reason else None, metadata=metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py index 8827b310ac0d..c163b6ffda74 100644 --- a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py @@ -128,6 +128,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: # Not all models support streaming: check if the model supports streaming before proceeding model_info = await self.get_foundation_model_info(self.ai_model_id) @@ -146,7 +147,7 @@ async def _inner_get_streaming_chat_message_contents( elif "contentBlockStart" in event: yield [self._parse_content_block_start_event(event)] elif "contentBlockDelta" in event: - yield [self._parse_content_block_delta_event(event)] + yield [self._parse_content_block_delta_event(event, function_invoke_attempt)] elif "contentBlockStop" in event: continue elif "messageStop" in event: @@ -338,7 +339,9 @@ def _parse_content_block_start_event(self, event: dict[str, Any]) -> StreamingCh inner_content=event, ) - def _parse_content_block_delta_event(self, event: dict[str, Any]) -> StreamingChatMessageContent: + def _parse_content_block_delta_event( + self, event: dict[str, Any], function_invoke_attempt: int + ) -> StreamingChatMessageContent: """Parse the content block delta event. The content block delta event contains the completion. @@ -363,6 +366,7 @@ def _parse_content_block_delta_event(self, event: dict[str, Any]) -> StreamingCh items=items, choice_index=0, inner_content=event, + function_invoke_attempt=function_invoke_attempt, ) def _parse_message_stop_event(self, event: dict[str, Any]) -> StreamingChatMessageContent: diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index 6a673dccd5eb..de9edf36c268 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -64,15 +64,17 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: """Send a streaming chat request to the AI service. Args: - chat_history (ChatHistory): The chat history to send. - settings (PromptExecutionSettings): The settings for the request. + chat_history: The chat history to send. + settings: The settings for the request. + function_invoke_attempt: The current attempt count for automatically invoking functions. Yields: - streaming_chat_message_contents (list[StreamingChatMessageContent]): The streaming chat message contents. + streaming_chat_message_contents: The streaming chat message contents. """ raise NotImplementedError("The _inner_get_streaming_chat_message_contents method is not implemented.") # Below is needed for mypy: https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators @@ -268,7 +270,9 @@ async def get_streaming_chat_message_contents( # Hold the messages, if there are more than one response, it will not be used, so we flatten all_messages: list["StreamingChatMessageContent"] = [] function_call_returned = False - async for messages in self._inner_get_streaming_chat_message_contents(chat_history, settings): + async for messages in self._inner_get_streaming_chat_message_contents( + chat_history, settings, request_index + ): for msg in messages: if msg is not None: all_messages.append(msg) @@ -313,6 +317,7 @@ async def get_streaming_chat_message_contents( function_result_messages = merge_streaming_function_results( messages=chat_history.messages[-len(results) :], ai_model_id=ai_model_id, # type: ignore + function_invoke_attempt=request_index, ) if self._yield_function_result_messages(function_result_messages): yield function_result_messages diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py index 365d43565ed9..c7ab3dba6b39 100644 --- a/python/semantic_kernel/connectors/ai/function_calling_utils.py +++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py @@ -101,6 +101,7 @@ def merge_function_results( def merge_streaming_function_results( messages: list[ChatMessageContent | StreamingChatMessageContent], ai_model_id: str, + function_invoke_attempt: int, ) -> list[StreamingChatMessageContent]: """Combine multiple streaming function result content types to one streaming chat message content type. @@ -110,6 +111,7 @@ def merge_streaming_function_results( Args: messages: The list of streaming chat message content types. ai_model_id: The AI model ID. + function_invoke_attempt: The function invoke attempt. Returns: The combined streaming chat message content type. @@ -118,4 +120,12 @@ def merge_streaming_function_results( for message in messages: items.extend([item for item in message.items if isinstance(item, FunctionResultContent)]) - return [StreamingChatMessageContent(role=AuthorRole.TOOL, items=items, choice_index=0, ai_model_id=ai_model_id)] + return [ + StreamingChatMessageContent( + role=AuthorRole.TOOL, + items=items, + choice_index=0, + ai_model_id=ai_model_id, + function_invoke_attempt=function_invoke_attempt, + ) + ] diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py index ca4ca998a122..df8f64cf4c6c 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py @@ -147,6 +147,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, GoogleAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -167,7 +168,10 @@ async def _inner_get_streaming_chat_message_contents( ) async for chunk in response: - yield [self._create_streaming_chat_message_content(chunk, candidate) for candidate in chunk.candidates] + yield [ + self._create_streaming_chat_message_content(chunk, candidate, function_invoke_attempt) + for candidate in chunk.candidates + ] @override def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") -> None: @@ -268,12 +272,14 @@ def _create_streaming_chat_message_content( self, chunk: GenerateContentResponse, candidate: Candidate, + function_invoke_attempt: int = 0, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. Args: chunk: The response from the service. candidate: The candidate from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -313,6 +319,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=finish_reason, metadata=response_metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py index 45d66396ff34..6372c71c5b1c 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py @@ -142,6 +142,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, VertexAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -162,7 +163,10 @@ async def _inner_get_streaming_chat_message_contents( ) async for chunk in response: - yield [self._create_streaming_chat_message_content(chunk, candidate) for candidate in chunk.candidates] + yield [ + self._create_streaming_chat_message_content(chunk, candidate, function_invoke_attempt) + for candidate in chunk.candidates + ] @override def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") -> None: @@ -262,12 +266,14 @@ def _create_streaming_chat_message_content( self, chunk: GenerationResponse, candidate: Candidate, + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. Args: chunk: The response from the service. candidate: The candidate from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -308,6 +314,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=finish_reason, metadata=response_metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py index 46f0c9f64a2b..b374235225a4 100644 --- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py @@ -159,6 +159,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, MistralAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -182,7 +183,9 @@ async def _inner_get_streaming_chat_message_contents( continue chunk_metadata = self._get_metadata_from_response(chunk.data) yield [ - self._create_streaming_chat_message_content(chunk.data, choice, chunk_metadata) + self._create_streaming_chat_message_content( + chunk.data, choice, chunk_metadata, function_invoke_attempt + ) for choice in chunk.data.choices ] @@ -216,6 +219,7 @@ def _create_streaming_chat_message_content( chunk: CompletionChunk, choice: CompletionResponseStreamChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a choice.""" metadata = self._get_metadata_from_chat_choice(choice) @@ -234,6 +238,7 @@ def _create_streaming_chat_message_content( role=AuthorRole(choice.delta.role) if choice.delta.role else AuthorRole.ASSISTANT, finish_reason=FinishReason(choice.finish_reason) if choice.finish_reason else None, items=items, + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_response(self, response: ChatCompletionResponse | CompletionChunk) -> dict[str, Any]: diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py index bfb452d9fc2d..baf2d04f2914 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py @@ -180,6 +180,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, OllamaChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -202,10 +203,10 @@ async def _inner_get_streaming_chat_message_contents( async for part in response_object: if isinstance(part, ChatResponse): - yield [self._create_streaming_chat_message_content_from_chat_response(part)] + yield [self._create_streaming_chat_message_content_from_chat_response(part, function_invoke_attempt)] continue if isinstance(part, Mapping): - yield [self._create_streaming_chat_message_content(part)] + yield [self._create_streaming_chat_message_content(part, function_invoke_attempt)] continue raise ServiceInvalidResponseError( "Invalid response type from Ollama streaming chat completion. " @@ -215,7 +216,9 @@ async def _inner_get_streaming_chat_message_contents( # endregion def _create_streaming_chat_message_content_from_chat_response( - self, response: ChatResponse + self, + response: ChatResponse, + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a chat message content from the response.""" items: list[STREAMING_ITEM_TYPES] = [] @@ -235,6 +238,7 @@ def _create_streaming_chat_message_content_from_chat_response( inner_content=response, ai_model_id=self.ai_model_id, metadata=self._get_metadata_from_chat_response(response), + function_invoke_attempt=function_invoke_attempt, ) def _parse_tool_calls(self, tool_calls: Sequence[Message.ToolCall] | None, items: list[Any]): @@ -299,7 +303,9 @@ def _create_chat_message_content(self, response: Mapping[str, Any]) -> ChatMessa metadata=self._get_metadata_from_response(response), ) - def _create_streaming_chat_message_content(self, part: Mapping[str, Any]) -> StreamingChatMessageContent: + def _create_streaming_chat_message_content( + self, part: Mapping[str, Any], function_invoke_attempt: int + ) -> StreamingChatMessageContent: """Create a streaming chat message content from the response part.""" items: list[STREAMING_ITEM_TYPES] = [] if not (message := part.get("message", None)): @@ -331,6 +337,7 @@ def _create_streaming_chat_message_content(self, part: Mapping[str, Any]) -> Str inner_content=part, ai_model_id=self.ai_model_id, metadata=self._get_metadata_from_response(part), + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_response(self, response: Mapping[str, Any]) -> dict[str, Any]: diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py index bb247cb55e43..28521975e366 100644 --- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py @@ -109,6 +109,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: """Create streaming chat message contents, in the number specified by the settings. @@ -116,6 +117,7 @@ async def _inner_get_streaming_chat_message_contents( chat_history : A list of chat chat_history, that can be rendered into a set of chat_history, from system, user, assistant and function. settings : Settings for the request. + function_invoke_attempt : The function invoke attempt. Yields: A stream representing the response(s) from the LLM. @@ -127,7 +129,7 @@ async def _inner_get_streaming_chat_message_contents( images = self._get_images_from_history(chat_history) async for chunk in self._generate_next_token_async(prompt, settings, images): yield [ - self._create_streaming_chat_message_content(choice_index, new_token) + self._create_streaming_chat_message_content(choice_index, new_token, function_invoke_attempt) for choice_index, new_token in enumerate(chunk) ] @@ -142,12 +144,15 @@ def _create_chat_message_content(self, choice: str) -> ChatMessageContent: ], ) - def _create_streaming_chat_message_content(self, choice_index: int, choice: str) -> StreamingChatMessageContent: + def _create_streaming_chat_message_content( + self, choice_index: int, choice: str, function_invoke_attempt: int + ) -> StreamingChatMessageContent: return StreamingChatMessageContent( role=AuthorRole.ASSISTANT, choice_index=choice_index, content=choice, ai_model_id=self.ai_model_id, + function_invoke_attempt=function_invoke_attempt, ) @override diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py index 73e1a8fe62b7..03289fd45d58 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py @@ -148,9 +148,10 @@ def _create_streaming_chat_message_content( chunk: ChatCompletionChunk, choice: ChunkChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int = 0, ) -> "StreamingChatMessageContent": """Create an Azure streaming chat message content object from a choice.""" - content = super()._create_streaming_chat_message_content(chunk, choice, chunk_metadata) + content = super()._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) assert isinstance(content, StreamingChatMessageContent) and isinstance(choice, ChunkChoice) # nosec return self._add_tool_message_to_chat_message_content(content, choice) diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index ec918dee605d..0c1e843c5d47 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -96,6 +96,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, OpenAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -126,12 +127,13 @@ async def _inner_get_streaming_chat_message_contents( inner_content=chunk, ai_model_id=settings.ai_model_id, metadata=chunk_metadata, + function_invoke_attempt=function_invoke_attempt, ) for i in range(settings.number_of_responses or 1) ] else: yield [ - self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) + self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) for choice in chunk.choices ] @@ -190,6 +192,7 @@ def _create_streaming_chat_message_content( chunk: ChatCompletionChunk, choice: ChunkChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a choice.""" metadata = self._get_metadata_from_chat_choice(choice) @@ -207,6 +210,7 @@ def _create_streaming_chat_message_content( role=(AuthorRole(choice.delta.role) if choice.delta and choice.delta.role else AuthorRole.ASSISTANT), finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None), items=items, + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_chat_response(self, response: ChatCompletion) -> dict[str, Any]: diff --git a/python/semantic_kernel/contents/streaming_chat_message_content.py b/python/semantic_kernel/contents/streaming_chat_message_content.py index 51110b43ea5c..683b498d0c69 100644 --- a/python/semantic_kernel/contents/streaming_chat_message_content.py +++ b/python/semantic_kernel/contents/streaming_chat_message_content.py @@ -4,6 +4,8 @@ from typing import Any, Union, overload from xml.etree.ElementTree import Element # nosec +from pydantic import Field + from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent @@ -51,6 +53,12 @@ class StreamingChatMessageContent(ChatMessageContent, StreamingContentMixin): __add__: Combines two StreamingChatMessageContent instances. """ + function_invoke_attempt: int | None = Field( + default=0, + description="Tracks the current attempt count for automatically invoking functions. " + "This value increments with each subsequent automatic invocation attempt.", + ) + @overload def __init__( self, @@ -63,6 +71,7 @@ def __init__( finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ) -> None: ... @overload @@ -77,6 +86,7 @@ def __init__( finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ) -> None: ... def __init__( # type: ignore @@ -91,26 +101,30 @@ def __init__( # type: ignore finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ): """Create a new instance of StreamingChatMessageContent. Args: - role: ChatRole - The role of the chat message. - choice_index: int - The index of the choice that generated this response. - items: list[TextContent, FunctionCallContent, FunctionResultContent, ImageContent] - The content. - content: str - The text of the response. - inner_content: Optional[Any] - The inner content of the response, + role: The role of the chat message. + choice_index: The index of the choice that generated this response. + items: The content. + content: The text of the response. + inner_content: The inner content of the response, this should hold all the information from the response so even when not creating a subclass a developer can leverage the full thing. - name: Optional[str] - The name of the response. - encoding: Optional[str] - The encoding of the text. - finish_reason: Optional[FinishReason] - The reason the response was finished. - metadata: Dict[str, Any] - Any metadata that should be attached to the response. - ai_model_id: Optional[str] - The id of the AI model that generated this response. + name: The name of the response. + encoding: The encoding of the text. + finish_reason: The reason the response was finished. + metadata: Any metadata that should be attached to the response. + ai_model_id: The id of the AI model that generated this response. + function_invoke_attempt: Tracks the current attempt count for automatically + invoking functions. This value increments with each subsequent automatic invocation attempt. """ kwargs: dict[str, Any] = { "role": role, "choice_index": choice_index, + "function_invoke_attempt": function_invoke_attempt, } if encoding: kwargs["encoding"] = encoding @@ -180,6 +194,7 @@ def __add__(self, other: "StreamingChatMessageContent") -> "StreamingChatMessage metadata=self.metadata | other.metadata, encoding=self.encoding, finish_reason=self.finish_reason or other.finish_reason, + function_invoke_attempt=self.function_invoke_attempt, ) def to_element(self) -> "Element": @@ -214,5 +229,6 @@ def __hash__(self) -> int: self.encoding, self.finish_reason, self.choice_index, + self.function_invoke_attempt, *self.items, )) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 697cce70712e..e6a01549f020 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -15,6 +15,9 @@ from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( OpenAIEmbeddingPromptExecutionSettings, ) +from semantic_kernel.connectors.openai_plugin.openai_function_execution_parameters import ( + OpenAIFunctionExecutionParameters, +) from semantic_kernel.data.record_definition.vector_store_model_decorator import vectorstoremodel from semantic_kernel.data.record_definition.vector_store_model_definition import VectorStoreRecordDefinition from semantic_kernel.data.record_definition.vector_store_record_fields import ( @@ -686,3 +689,10 @@ class DataModelClass(BaseModel): key: Annotated[str, VectorStoreRecordKeyField()] return DataModelClass + + +@fixture +def define_openai_predicate_context(): + from semantic_kernel.connectors.openapi_plugin import OperationSelectionPredicateContext # noqa: F401 + + OpenAIFunctionExecutionParameters.model_rebuild() diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py index bf3ff42ede2c..e108221d6217 100644 --- a/python/tests/samples/test_concepts.py +++ b/python/tests/samples/test_concepts.py @@ -8,7 +8,9 @@ import pytest from pytest import mark, param -from samples.concepts.auto_function_calling.chat_gpt_api_function_calling import main as chat_gpt_api_function_calling +from samples.concepts.auto_function_calling.chat_completion_with_auto_function_calling import ( + main as chat_completion_with_function_calling, +) from samples.concepts.auto_function_calling.functions_defined_in_json_prompt import ( main as function_defined_in_json_prompt, ) @@ -106,9 +108,9 @@ ), ), param( - chat_gpt_api_function_calling, + chat_completion_with_function_calling, ["What is 3+3?", "exit"], - id="chat_gpt_api_function_calling", + id="chat_completion_with_function_calling", marks=pytest.mark.skipif( os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples." ), diff --git a/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py b/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py index 1d25486b5a86..094d57619c53 100644 --- a/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py +++ b/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py @@ -733,6 +733,10 @@ async def dummy_auth_callback(**kwargs): @pytest.fixture def openapi_runner_with_predicate_callback(): + from semantic_kernel.connectors.openapi_plugin import OperationSelectionPredicateContext # noqa: F401 + + OpenAPIFunctionExecutionParameters.model_rebuild() + # Define a dummy predicate callback def predicate_callback(context): # Skip operations with DELETE method or containing 'internal' in the path @@ -749,7 +753,7 @@ def predicate_callback(context): return runner, operations, exec_settings -def test_predicate_callback_applied(openapi_runner_with_predicate_callback): +def test_predicate_callback_applied(openapi_runner_with_predicate_callback, define_openai_predicate_context): _, operations, exec_settings = openapi_runner_with_predicate_callback skipped_operations = [] @@ -809,6 +813,10 @@ async def test_run_operation_with_error(mock_request, openapi_runner): def test_invalid_server_url_override(): + from semantic_kernel.connectors.openapi_plugin import OperationSelectionPredicateContext # noqa: F401 + + OpenAPIFunctionExecutionParameters.model_rebuild() + with pytest.raises(ValueError, match="Invalid server_url_override: invalid_url"): params = OpenAPIFunctionExecutionParameters(server_url_override="invalid_url") params.model_post_init(None) diff --git a/python/tests/unit/functions/test_kernel_plugins.py b/python/tests/unit/functions/test_kernel_plugins.py index fd9102f7a5c9..8e487e7022cd 100644 --- a/python/tests/unit/functions/test_kernel_plugins.py +++ b/python/tests/unit/functions/test_kernel_plugins.py @@ -498,7 +498,7 @@ def test_from_object_class(custom_plugin_class): @patch("semantic_kernel.connectors.openai_plugin.openai_utils.OpenAIUtils.parse_openai_manifest_for_openapi_spec_url") -async def test_from_openai_from_file(mock_parse_openai_manifest): +async def test_from_openai_from_file(mock_parse_openai_manifest, define_openai_predicate_context): openai_spec_file = os.path.join(os.path.dirname(__file__), "../../assets/test_plugins") with open(os.path.join(openai_spec_file, "TestOpenAIPlugin", "akv-openai.json")) as file: openai_spec = file.read() @@ -526,7 +526,7 @@ async def test_from_openai_from_file(mock_parse_openai_manifest): @patch("httpx.AsyncClient.get") @patch("semantic_kernel.connectors.openai_plugin.openai_utils.OpenAIUtils.parse_openai_manifest_for_openapi_spec_url") -async def test_from_openai_plugin_from_url(mock_parse_openai_manifest, mock_get): +async def test_from_openai_plugin_from_url(mock_parse_openai_manifest, mock_get, define_openai_predicate_context): openai_spec_file_path = os.path.join( os.path.dirname(__file__), "../../assets/test_plugins", "TestOpenAIPlugin", "akv-openai.json" ) @@ -561,12 +561,12 @@ async def test_from_openai_plugin_from_url(mock_parse_openai_manifest, mock_get) mock_get.assert_awaited_once_with(fake_plugin_url, headers={"User-Agent": HTTP_USER_AGENT}) -async def test_from_openai_fail(): +async def test_from_openai_fail(define_openai_predicate_context): with raises(PluginInitializationError): await KernelPlugin.from_openai(plugin_name="TestOpenAIPlugin") -async def test_from_openai_fail_json_parsing(): +async def test_from_openai_fail_json_parsing(define_openai_predicate_context): with raises(PluginInitializationError): await KernelPlugin.from_openai(plugin_name="TestOpenAIPlugin", plugin_str="test") diff --git a/python/tests/unit/kernel/test_kernel.py b/python/tests/unit/kernel/test_kernel.py index 808c69d4fc6e..ef935c030b57 100644 --- a/python/tests/unit/kernel/test_kernel.py +++ b/python/tests/unit/kernel/test_kernel.py @@ -589,7 +589,7 @@ def func2(arg1: str) -> str: @patch("semantic_kernel.connectors.openai_plugin.openai_utils.OpenAIUtils.parse_openai_manifest_for_openapi_spec_url") -async def test_add_plugin_from_openai(mock_parse_openai_manifest, kernel: Kernel): +async def test_add_plugin_from_openai(mock_parse_openai_manifest, kernel: Kernel, define_openai_predicate_context): base_folder = os.path.join(os.path.dirname(__file__), "../../assets/test_plugins") with open(os.path.join(base_folder, "TestOpenAIPlugin", "akv-openai.json")) as file: openai_spec = file.read()