From 69c7a6a61bb41d7bae29a0c6276717638d00a4bc Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Wed, 18 Dec 2024 19:39:16 +0900 Subject: [PATCH] Include a function_invoke_attempt index with Streaming CMC. Update tests and samples. --- .../anthropic_api_function_calling.py | 206 -------------- .../chat_completion_with_function_calling.py | 257 ++++++++++++++++++ .../chat_gpt_api_function_calling.py | 207 -------------- .../chat_mistral_ai_api_function_calling.py | 215 --------------- .../services/anthropic_chat_completion.py | 13 +- .../azure_ai_inference_chat_completion.py | 7 +- .../services/bedrock_chat_completion.py | 8 +- .../ai/chat_completion_client_base.py | 13 +- .../connectors/ai/function_calling_utils.py | 12 +- .../services/google_ai_chat_completion.py | 9 +- .../services/vertex_ai_chat_completion.py | 9 +- .../services/mistral_ai_chat_completion.py | 7 +- .../ollama/services/ollama_chat_completion.py | 15 +- .../services/onnx_gen_ai_chat_completion.py | 9 +- .../open_ai/services/azure_chat_completion.py | 3 +- .../services/open_ai_chat_completion_base.py | 6 +- .../streaming_chat_message_content.py | 36 ++- python/tests/samples/test_concepts.py | 8 +- 18 files changed, 377 insertions(+), 663 deletions(-) delete mode 100644 python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py delete mode 100644 python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py delete mode 100644 python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py diff --git a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py b/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py deleted file mode 100644 index 5769943157db..000000000000 --- a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = False - -kernel = Kernel() - -# Note: the underlying model needs to support function calling. -# https://docs.anthropic.com/en/docs/build-with-claude/tool-use#choosing-a-model -kernel.add_service(AnthropicChatCompletion(service_id="chat", ai_model_id="claude-3-opus-20240229")) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"` or `"required"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -execution_settings = AnthropicChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py new file mode 100644 index 000000000000..445106e222cb --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py @@ -0,0 +1,257 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from functools import reduce +from typing import TYPE_CHECKING + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +if TYPE_CHECKING: + from semantic_kernel.functions import KernelFunction + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Toggle this flag to switch between streaming and non-streaming modes. +stream = True + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# You can select from the following chat completion services that support function calling: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True. +# - If `auto_invoke=True`, the model will automatically choose and call functions as needed. +# - If `auto_invoke=False`, the model may return tool call instructions that you must handle and call manually. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True) + +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +def print_tool_calls(message: ChatMessageContent) -> None: + """ + A helper function to pretty print the tool calls found in a ChatMessageContent message. + This is useful when auto tool invocation is disabled and the model returns calls that you must handle. + """ + items = message.items + formatted_tool_calls = [] + for i, item in enumerate(items, start=1): + if isinstance(item, FunctionCallContent): + tool_call_id = item.id + function_name = item.name + function_arguments = item.arguments + formatted_str = ( + f"tool_call {i} id: {tool_call_id}\n" + f"tool_call {i} function name: {function_name}\n" + f"tool_call {i} arguments: {function_arguments}" + ) + formatted_tool_calls.append(formatted_str) + if len(formatted_tool_calls) > 0: + print("\n[Tool calls returned by the model]:\n" + "\n\n".join(formatted_tool_calls)) + else: + print("\n[No tool calls returned by the model]") + + +async def handle_streaming( + kernel: Kernel, + chat_function: "KernelFunction", + arguments: KernelArguments, +) -> str | None: + """ + Handle the streaming response from the model. + This function demonstrates two possible paths: + + 1. When auto function calling is ON (auto_invoke=True): + - The model may call tools automatically and produce a continuous + stream of assistant messages. We can simply print these as they come in. + + 2. When auto function calling is OFF (auto_invoke=False): + - The model may instead return tool call instructions embedded in the stream. + We can track these calls using `function_invoke_attempt` attributes and print + them for the user. The user can then manually invoke the tools and return the results + to the model for further completion. + """ + + response = kernel.invoke_stream( + chat_function, + return_function_results=False, + arguments=arguments, + ) + + # We will differentiate behavior based on whether auto invoking kernel functions is enabled. + auto_invoking = request_settings.function_choice_behavior.auto_invoke_kernel_functions + + print("Mosscap:> ", end="", flush=True) + + # If auto_invoking is False, the model may return separate streaming chunks containing tool instructions. + # We'll store them here. + streamed_tool_chunks: list[StreamingChatMessageContent] = [] + + # For content messages (the final assistant's response text), store them here. + streamed_response_chunks: list[StreamingChatMessageContent] = [] + + async for message in response: + msg = message[0] + + # We only expect assistant messages here. + if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: + continue + + if auto_invoking: + # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive. + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + else: + # When auto invocation is OFF, the model may send chunks that represent tool calls. + # Chunks that contain function call instructions will have a function_invoke_attempt attribute. + if hasattr(msg, "function_invoke_attempt"): + # This chunk is part of a tool call instruction sequence + streamed_tool_chunks.append(msg) + else: + # This chunk is normal assistant response text + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + + print("\n", flush=True) + + # If auto function calling was OFF, handle any tool call instructions we captured. + if not auto_invoking and streamed_tool_chunks: + # Group streamed chunks by `function_invoke_attempt` to handle each invocation attempt separately. + grouped_chunks = {} + for chunk in streamed_tool_chunks: + key = getattr(chunk, "function_invoke_attempt", None) + if key is not None: + grouped_chunks.setdefault(key, []).append(chunk) + + # Process each group of chunks + for attempt, chunks in grouped_chunks.items(): + try: + # Combine all chunks for a given attempt into one message. + combined_content = reduce(lambda first, second: first + second, chunks) + if hasattr(combined_content, "content"): + print(f"[function_invoke_attempt {attempt} content]:\n{combined_content.content}") + + print("[Auto function calling is OFF] Here are the returned tool calls:") + print_tool_calls(combined_content) + except Exception as e: + print(f"Error processing chunks for function_invoke_attempt {attempt}: {e}") + + # Return the final concatenated assistant response (if any). + if streamed_response_chunks: + return "".join([str(content) for content in streamed_response_chunks]) + return None + + +async def chat() -> bool: + """ + Continuously prompt the user for input and show the assistant's response. + Type 'exit' to exit. + """ + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + return False + + arguments["user_input"] = user_input + arguments["chat_history"] = history + + if stream: + # Handle streaming responses + result = await handle_streaming(kernel, chat_function, arguments=arguments) + else: + # Handle non-streaming responses + result = await kernel.invoke(chat_function, arguments=arguments) + + # If function calls are returned and auto invoking is off, we must show them. + if not request_settings.function_choice_behavior.auto_invoke_kernel_functions and result and result.value: + # Extract function calls from the returned content + function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] + if len(function_calls) > 0: + print_tool_calls(result.value[0]) + # At this point, you'd handle these calls manually if desired. + # For now, we just print them. + return True + + # If no function calls to handle, just print the assistant's response + if result: + print(f"Mosscap:> {result}") + + # Update the chat history with the user's input and the assistant's response + if result: + history.add_user_message(user_input) + history.add_assistant_message(str(result)) + + return True + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + chatting = True + while chatting: + chatting = await chat() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py deleted file mode 100644 index 2ced79d2f8be..000000000000 --- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = True - -kernel = Kernel() - -# Note: the underlying gpt-35/gpt-4 model version needs to be at least version 0613 to support tools. -kernel.add_service(OpenAIChatCompletion(service_id="chat")) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"`, `"required"`, or `"none"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -# Note: the number of responses for auto invoking tool calls is limited to 1. -# If configured to be greater than one, this value will be overridden to 1. -execution_settings = OpenAIChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content: list[StreamingChatMessageContent] = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py deleted file mode 100644 index 5ee05a835e2a..000000000000 --- a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = True - -kernel = Kernel() - -# Note: the underlying Model must be Mistral Small, Mistral Large, Mixtral 8x22B, Mistral Nemo. -# You can use MISTRALAI_API_KEY and MISTRALAI_CHAT_MODEL_ID environment variables to set the API key and model ID. -# Or just set it here in the Constructor for testing -kernel.add_service( - MistralAIChatCompletion( - service_id="chat", - # api_key=XXXXXXX, - # ai_model_id="mistral-large", - ) -) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"`, `"required"`, or `"none"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -# Note: the number of responses for auto invoking tool calls is limited to 1. -# If configured to be greater than one, this value will be overridden to 1. -execution_settings = MistralAIChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py index ed2616ba71aa..f8490edba2cd 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py @@ -154,6 +154,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, AnthropicChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -164,7 +165,7 @@ async def _inner_get_streaming_chat_message_contents( if settings.system is None and parsed_system_message is not None: settings.system = parsed_system_message - response = self._send_chat_stream_request(settings) + response = self._send_chat_stream_request(settings, function_invoke_attempt) if not isinstance(response, AsyncGenerator): raise ServiceInvalidResponseError("Expected an AsyncGenerator response.") @@ -242,6 +243,7 @@ def _create_streaming_chat_message_content( self, stream_event: TextEvent | ContentBlockStopEvent | RawMessageDeltaEvent, metadata: dict[str, Any] = {}, + function_invoke_attempt: int = 0, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a content block.""" items: list[STREAMING_ITEM_TYPES] = [] @@ -275,6 +277,7 @@ def _create_streaming_chat_message_content( role=AuthorRole.ASSISTANT, finish_reason=finish_reason, items=items, + function_invoke_attempt=function_invoke_attempt, ) def update_settings_from_function_call_configuration_anthropic( @@ -338,7 +341,9 @@ async def _send_chat_request(self, settings: AnthropicChatPromptExecutionSetting return [self._create_chat_message_content(response, response_metadata)] async def _send_chat_stream_request( - self, settings: AnthropicChatPromptExecutionSettings + self, + settings: AnthropicChatPromptExecutionSettings, + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], None]: """Send the chat stream request. @@ -359,7 +364,9 @@ async def _send_chat_stream_request( isinstance(stream_event, ContentBlockStopEvent) and stream_event.content_block.type == "tool_use" ): - yield [self._create_streaming_chat_message_content(stream_event, metadata)] + yield [ + self._create_streaming_chat_message_content(stream_event, metadata, function_invoke_attempt) + ] except Exception as ex: raise ServiceResponseException( f"{type(self)} service failed to complete the request", diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py index e48268d223bb..8ac10561f142 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py @@ -138,6 +138,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, AzureAIInferenceChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -157,7 +158,8 @@ async def _inner_get_streaming_chat_message_contents( continue chunk_metadata = self._get_metadata_from_response(chunk) yield [ - self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices + self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) + for choice in chunk.choices ] @override @@ -255,6 +257,7 @@ def _create_streaming_chat_message_content( chunk: AsyncStreamingChatCompletions, choice: StreamingChatChoiceUpdate, metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. @@ -262,6 +265,7 @@ def _create_streaming_chat_message_content( chunk: The chunk from the response. choice: The choice from the response. metadata: The metadata from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -295,6 +299,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=FinishReason(choice.finish_reason) if choice.finish_reason else None, metadata=metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py index 8827b310ac0d..c163b6ffda74 100644 --- a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py @@ -128,6 +128,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: # Not all models support streaming: check if the model supports streaming before proceeding model_info = await self.get_foundation_model_info(self.ai_model_id) @@ -146,7 +147,7 @@ async def _inner_get_streaming_chat_message_contents( elif "contentBlockStart" in event: yield [self._parse_content_block_start_event(event)] elif "contentBlockDelta" in event: - yield [self._parse_content_block_delta_event(event)] + yield [self._parse_content_block_delta_event(event, function_invoke_attempt)] elif "contentBlockStop" in event: continue elif "messageStop" in event: @@ -338,7 +339,9 @@ def _parse_content_block_start_event(self, event: dict[str, Any]) -> StreamingCh inner_content=event, ) - def _parse_content_block_delta_event(self, event: dict[str, Any]) -> StreamingChatMessageContent: + def _parse_content_block_delta_event( + self, event: dict[str, Any], function_invoke_attempt: int + ) -> StreamingChatMessageContent: """Parse the content block delta event. The content block delta event contains the completion. @@ -363,6 +366,7 @@ def _parse_content_block_delta_event(self, event: dict[str, Any]) -> StreamingCh items=items, choice_index=0, inner_content=event, + function_invoke_attempt=function_invoke_attempt, ) def _parse_message_stop_event(self, event: dict[str, Any]) -> StreamingChatMessageContent: diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index 6a673dccd5eb..de9edf36c268 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -64,15 +64,17 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: """Send a streaming chat request to the AI service. Args: - chat_history (ChatHistory): The chat history to send. - settings (PromptExecutionSettings): The settings for the request. + chat_history: The chat history to send. + settings: The settings for the request. + function_invoke_attempt: The current attempt count for automatically invoking functions. Yields: - streaming_chat_message_contents (list[StreamingChatMessageContent]): The streaming chat message contents. + streaming_chat_message_contents: The streaming chat message contents. """ raise NotImplementedError("The _inner_get_streaming_chat_message_contents method is not implemented.") # Below is needed for mypy: https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators @@ -268,7 +270,9 @@ async def get_streaming_chat_message_contents( # Hold the messages, if there are more than one response, it will not be used, so we flatten all_messages: list["StreamingChatMessageContent"] = [] function_call_returned = False - async for messages in self._inner_get_streaming_chat_message_contents(chat_history, settings): + async for messages in self._inner_get_streaming_chat_message_contents( + chat_history, settings, request_index + ): for msg in messages: if msg is not None: all_messages.append(msg) @@ -313,6 +317,7 @@ async def get_streaming_chat_message_contents( function_result_messages = merge_streaming_function_results( messages=chat_history.messages[-len(results) :], ai_model_id=ai_model_id, # type: ignore + function_invoke_attempt=request_index, ) if self._yield_function_result_messages(function_result_messages): yield function_result_messages diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py index 365d43565ed9..c7ab3dba6b39 100644 --- a/python/semantic_kernel/connectors/ai/function_calling_utils.py +++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py @@ -101,6 +101,7 @@ def merge_function_results( def merge_streaming_function_results( messages: list[ChatMessageContent | StreamingChatMessageContent], ai_model_id: str, + function_invoke_attempt: int, ) -> list[StreamingChatMessageContent]: """Combine multiple streaming function result content types to one streaming chat message content type. @@ -110,6 +111,7 @@ def merge_streaming_function_results( Args: messages: The list of streaming chat message content types. ai_model_id: The AI model ID. + function_invoke_attempt: The function invoke attempt. Returns: The combined streaming chat message content type. @@ -118,4 +120,12 @@ def merge_streaming_function_results( for message in messages: items.extend([item for item in message.items if isinstance(item, FunctionResultContent)]) - return [StreamingChatMessageContent(role=AuthorRole.TOOL, items=items, choice_index=0, ai_model_id=ai_model_id)] + return [ + StreamingChatMessageContent( + role=AuthorRole.TOOL, + items=items, + choice_index=0, + ai_model_id=ai_model_id, + function_invoke_attempt=function_invoke_attempt, + ) + ] diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py index ca4ca998a122..df8f64cf4c6c 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py @@ -147,6 +147,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, GoogleAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -167,7 +168,10 @@ async def _inner_get_streaming_chat_message_contents( ) async for chunk in response: - yield [self._create_streaming_chat_message_content(chunk, candidate) for candidate in chunk.candidates] + yield [ + self._create_streaming_chat_message_content(chunk, candidate, function_invoke_attempt) + for candidate in chunk.candidates + ] @override def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") -> None: @@ -268,12 +272,14 @@ def _create_streaming_chat_message_content( self, chunk: GenerateContentResponse, candidate: Candidate, + function_invoke_attempt: int = 0, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. Args: chunk: The response from the service. candidate: The candidate from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -313,6 +319,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=finish_reason, metadata=response_metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py index 45d66396ff34..6372c71c5b1c 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py @@ -142,6 +142,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, VertexAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -162,7 +163,10 @@ async def _inner_get_streaming_chat_message_contents( ) async for chunk in response: - yield [self._create_streaming_chat_message_content(chunk, candidate) for candidate in chunk.candidates] + yield [ + self._create_streaming_chat_message_content(chunk, candidate, function_invoke_attempt) + for candidate in chunk.candidates + ] @override def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") -> None: @@ -262,12 +266,14 @@ def _create_streaming_chat_message_content( self, chunk: GenerationResponse, candidate: Candidate, + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. Args: chunk: The response from the service. candidate: The candidate from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -308,6 +314,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=finish_reason, metadata=response_metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py index 46f0c9f64a2b..b374235225a4 100644 --- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py @@ -159,6 +159,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, MistralAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -182,7 +183,9 @@ async def _inner_get_streaming_chat_message_contents( continue chunk_metadata = self._get_metadata_from_response(chunk.data) yield [ - self._create_streaming_chat_message_content(chunk.data, choice, chunk_metadata) + self._create_streaming_chat_message_content( + chunk.data, choice, chunk_metadata, function_invoke_attempt + ) for choice in chunk.data.choices ] @@ -216,6 +219,7 @@ def _create_streaming_chat_message_content( chunk: CompletionChunk, choice: CompletionResponseStreamChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a choice.""" metadata = self._get_metadata_from_chat_choice(choice) @@ -234,6 +238,7 @@ def _create_streaming_chat_message_content( role=AuthorRole(choice.delta.role) if choice.delta.role else AuthorRole.ASSISTANT, finish_reason=FinishReason(choice.finish_reason) if choice.finish_reason else None, items=items, + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_response(self, response: ChatCompletionResponse | CompletionChunk) -> dict[str, Any]: diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py index bfb452d9fc2d..baf2d04f2914 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py @@ -180,6 +180,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, OllamaChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -202,10 +203,10 @@ async def _inner_get_streaming_chat_message_contents( async for part in response_object: if isinstance(part, ChatResponse): - yield [self._create_streaming_chat_message_content_from_chat_response(part)] + yield [self._create_streaming_chat_message_content_from_chat_response(part, function_invoke_attempt)] continue if isinstance(part, Mapping): - yield [self._create_streaming_chat_message_content(part)] + yield [self._create_streaming_chat_message_content(part, function_invoke_attempt)] continue raise ServiceInvalidResponseError( "Invalid response type from Ollama streaming chat completion. " @@ -215,7 +216,9 @@ async def _inner_get_streaming_chat_message_contents( # endregion def _create_streaming_chat_message_content_from_chat_response( - self, response: ChatResponse + self, + response: ChatResponse, + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a chat message content from the response.""" items: list[STREAMING_ITEM_TYPES] = [] @@ -235,6 +238,7 @@ def _create_streaming_chat_message_content_from_chat_response( inner_content=response, ai_model_id=self.ai_model_id, metadata=self._get_metadata_from_chat_response(response), + function_invoke_attempt=function_invoke_attempt, ) def _parse_tool_calls(self, tool_calls: Sequence[Message.ToolCall] | None, items: list[Any]): @@ -299,7 +303,9 @@ def _create_chat_message_content(self, response: Mapping[str, Any]) -> ChatMessa metadata=self._get_metadata_from_response(response), ) - def _create_streaming_chat_message_content(self, part: Mapping[str, Any]) -> StreamingChatMessageContent: + def _create_streaming_chat_message_content( + self, part: Mapping[str, Any], function_invoke_attempt: int + ) -> StreamingChatMessageContent: """Create a streaming chat message content from the response part.""" items: list[STREAMING_ITEM_TYPES] = [] if not (message := part.get("message", None)): @@ -331,6 +337,7 @@ def _create_streaming_chat_message_content(self, part: Mapping[str, Any]) -> Str inner_content=part, ai_model_id=self.ai_model_id, metadata=self._get_metadata_from_response(part), + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_response(self, response: Mapping[str, Any]) -> dict[str, Any]: diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py index bb247cb55e43..28521975e366 100644 --- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py @@ -109,6 +109,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: """Create streaming chat message contents, in the number specified by the settings. @@ -116,6 +117,7 @@ async def _inner_get_streaming_chat_message_contents( chat_history : A list of chat chat_history, that can be rendered into a set of chat_history, from system, user, assistant and function. settings : Settings for the request. + function_invoke_attempt : The function invoke attempt. Yields: A stream representing the response(s) from the LLM. @@ -127,7 +129,7 @@ async def _inner_get_streaming_chat_message_contents( images = self._get_images_from_history(chat_history) async for chunk in self._generate_next_token_async(prompt, settings, images): yield [ - self._create_streaming_chat_message_content(choice_index, new_token) + self._create_streaming_chat_message_content(choice_index, new_token, function_invoke_attempt) for choice_index, new_token in enumerate(chunk) ] @@ -142,12 +144,15 @@ def _create_chat_message_content(self, choice: str) -> ChatMessageContent: ], ) - def _create_streaming_chat_message_content(self, choice_index: int, choice: str) -> StreamingChatMessageContent: + def _create_streaming_chat_message_content( + self, choice_index: int, choice: str, function_invoke_attempt: int + ) -> StreamingChatMessageContent: return StreamingChatMessageContent( role=AuthorRole.ASSISTANT, choice_index=choice_index, content=choice, ai_model_id=self.ai_model_id, + function_invoke_attempt=function_invoke_attempt, ) @override diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py index 73e1a8fe62b7..03289fd45d58 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py @@ -148,9 +148,10 @@ def _create_streaming_chat_message_content( chunk: ChatCompletionChunk, choice: ChunkChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int = 0, ) -> "StreamingChatMessageContent": """Create an Azure streaming chat message content object from a choice.""" - content = super()._create_streaming_chat_message_content(chunk, choice, chunk_metadata) + content = super()._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) assert isinstance(content, StreamingChatMessageContent) and isinstance(choice, ChunkChoice) # nosec return self._add_tool_message_to_chat_message_content(content, choice) diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index ec918dee605d..0c1e843c5d47 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -96,6 +96,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, OpenAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -126,12 +127,13 @@ async def _inner_get_streaming_chat_message_contents( inner_content=chunk, ai_model_id=settings.ai_model_id, metadata=chunk_metadata, + function_invoke_attempt=function_invoke_attempt, ) for i in range(settings.number_of_responses or 1) ] else: yield [ - self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) + self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) for choice in chunk.choices ] @@ -190,6 +192,7 @@ def _create_streaming_chat_message_content( chunk: ChatCompletionChunk, choice: ChunkChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a choice.""" metadata = self._get_metadata_from_chat_choice(choice) @@ -207,6 +210,7 @@ def _create_streaming_chat_message_content( role=(AuthorRole(choice.delta.role) if choice.delta and choice.delta.role else AuthorRole.ASSISTANT), finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None), items=items, + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_chat_response(self, response: ChatCompletion) -> dict[str, Any]: diff --git a/python/semantic_kernel/contents/streaming_chat_message_content.py b/python/semantic_kernel/contents/streaming_chat_message_content.py index 51110b43ea5c..683b498d0c69 100644 --- a/python/semantic_kernel/contents/streaming_chat_message_content.py +++ b/python/semantic_kernel/contents/streaming_chat_message_content.py @@ -4,6 +4,8 @@ from typing import Any, Union, overload from xml.etree.ElementTree import Element # nosec +from pydantic import Field + from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent @@ -51,6 +53,12 @@ class StreamingChatMessageContent(ChatMessageContent, StreamingContentMixin): __add__: Combines two StreamingChatMessageContent instances. """ + function_invoke_attempt: int | None = Field( + default=0, + description="Tracks the current attempt count for automatically invoking functions. " + "This value increments with each subsequent automatic invocation attempt.", + ) + @overload def __init__( self, @@ -63,6 +71,7 @@ def __init__( finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ) -> None: ... @overload @@ -77,6 +86,7 @@ def __init__( finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ) -> None: ... def __init__( # type: ignore @@ -91,26 +101,30 @@ def __init__( # type: ignore finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ): """Create a new instance of StreamingChatMessageContent. Args: - role: ChatRole - The role of the chat message. - choice_index: int - The index of the choice that generated this response. - items: list[TextContent, FunctionCallContent, FunctionResultContent, ImageContent] - The content. - content: str - The text of the response. - inner_content: Optional[Any] - The inner content of the response, + role: The role of the chat message. + choice_index: The index of the choice that generated this response. + items: The content. + content: The text of the response. + inner_content: The inner content of the response, this should hold all the information from the response so even when not creating a subclass a developer can leverage the full thing. - name: Optional[str] - The name of the response. - encoding: Optional[str] - The encoding of the text. - finish_reason: Optional[FinishReason] - The reason the response was finished. - metadata: Dict[str, Any] - Any metadata that should be attached to the response. - ai_model_id: Optional[str] - The id of the AI model that generated this response. + name: The name of the response. + encoding: The encoding of the text. + finish_reason: The reason the response was finished. + metadata: Any metadata that should be attached to the response. + ai_model_id: The id of the AI model that generated this response. + function_invoke_attempt: Tracks the current attempt count for automatically + invoking functions. This value increments with each subsequent automatic invocation attempt. """ kwargs: dict[str, Any] = { "role": role, "choice_index": choice_index, + "function_invoke_attempt": function_invoke_attempt, } if encoding: kwargs["encoding"] = encoding @@ -180,6 +194,7 @@ def __add__(self, other: "StreamingChatMessageContent") -> "StreamingChatMessage metadata=self.metadata | other.metadata, encoding=self.encoding, finish_reason=self.finish_reason or other.finish_reason, + function_invoke_attempt=self.function_invoke_attempt, ) def to_element(self) -> "Element": @@ -214,5 +229,6 @@ def __hash__(self) -> int: self.encoding, self.finish_reason, self.choice_index, + self.function_invoke_attempt, *self.items, )) diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py index bf3ff42ede2c..6e8d4ad1a9e0 100644 --- a/python/tests/samples/test_concepts.py +++ b/python/tests/samples/test_concepts.py @@ -8,7 +8,9 @@ import pytest from pytest import mark, param -from samples.concepts.auto_function_calling.chat_gpt_api_function_calling import main as chat_gpt_api_function_calling +from samples.concepts.auto_function_calling.chat_completion_with_function_calling import ( + main as chat_completion_with_function_calling, +) from samples.concepts.auto_function_calling.functions_defined_in_json_prompt import ( main as function_defined_in_json_prompt, ) @@ -106,9 +108,9 @@ ), ), param( - chat_gpt_api_function_calling, + chat_completion_with_function_calling, ["What is 3+3?", "exit"], - id="chat_gpt_api_function_calling", + id="chat_completion_with_function_calling", marks=pytest.mark.skipif( os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples." ),