From 8464649f4375d60e2a606bfb13b9d2f72578a484 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Thu, 19 Dec 2024 10:01:16 +0900
Subject: [PATCH] Break samples down further - streaming and non streaming auto
 invoke and manual invoke

---
 ...t_completion_with_auto_function_calling.py | 125 +++++++++++++
 ...on_with_auto_function_calling_streaming.py | 169 ++++++++++++++++++
 ...ompletion_with_manual_function_calling.py} |  14 +-
 ...with_manual_function_calling_streaming.py} |  32 ++--
 4 files changed, 313 insertions(+), 27 deletions(-)
 create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py
 create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py
 rename python/samples/concepts/auto_function_calling/{chat_completion_with_function_calling.py => chat_completion_with_manual_function_calling.py} (93%)
 rename python/samples/concepts/auto_function_calling/{chat_completion_with_function_calling_streaming.py => chat_completion_with_manual_function_calling_streaming.py} (88%)

diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py
new file mode 100644
index 000000000000..2dad6517e31c
--- /dev/null
+++ b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py
@@ -0,0 +1,125 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+from typing import TYPE_CHECKING
+
+from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.core_plugins.math_plugin import MathPlugin
+from semantic_kernel.core_plugins.time_plugin import TimePlugin
+from semantic_kernel.functions import KernelArguments
+
+if TYPE_CHECKING:
+    pass
+
+#####################################################################
+# This sample demonstrates how to build a conversational chatbot    #
+# using Semantic Kernel, featuring auto function calling,           #
+# non-streaming responses, and support for math and time plugins.   #
+# The chatbot is designed to interact with the user, call functions #
+# as needed, and return responses.                                  #
+#####################################################################
+
+# System message defining the behavior and persona of the chat bot.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose. You are also a math wizard,
+especially for adding and subtracting.
+You also excel at joke telling, where your tone is often sarcastic.
+Once you have the answer I am looking for,
+you will return a full answer to me as soon as possible.
+"""
+
+# Create and configure the kernel.
+kernel = Kernel()
+
+# Load some sample plugins (for demonstration of function calling).
+kernel.add_plugin(MathPlugin(), plugin_name="math")
+kernel.add_plugin(TimePlugin(), plugin_name="time")
+
+# Define a chat function (a template for how to handle user input).
+chat_function = kernel.add_function(
+    prompt="{{$chat_history}}{{$user_input}}",
+    plugin_name="ChatBot",
+    function_name="Chat",
+)
+
+# You can select from the following chat completion services that support function calling:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=True by default.
+# With `auto_invoke=True`, the model will automatically choose and call functions as needed.
+request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
+
+kernel.add_service(chat_completion_service)
+
+# Pass the request settings to the kernel arguments.
+arguments = KernelArguments(settings=request_settings)
+
+# Create a chat history to store the system message, initial messages, and the conversation.
+history = ChatHistory()
+history.add_system_message(system_message)
+history.add_user_message("Hi there, who are you?")
+history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
+
+
+async def chat() -> bool:
+    """
+    Continuously prompt the user for input and show the assistant's response.
+    Type 'exit' to exit.
+    """
+    try:
+        user_input = input("User:> ")
+    except (KeyboardInterrupt, EOFError):
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input.lower().strip() == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    arguments["user_input"] = user_input
+    arguments["chat_history"] = history
+
+    # Handle non-streaming responses
+    result = await kernel.invoke(chat_function, arguments=arguments)
+
+    # Update the chat history with the user's input and the assistant's response
+    if result:
+        print(f"Mosscap:> {result}")
+        history.add_user_message(user_input)
+        history.add_assistant_message(str(result))
+
+    return True
+
+
+async def main() -> None:
+    print(
+        "Welcome to the chat bot!\n"
+        "  Type 'exit' to exit.\n"
+        "  Try a math question to see function calling in action (e.g. 'what is 3+3?')."
+    )
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py
new file mode 100644
index 000000000000..86435032ba4d
--- /dev/null
+++ b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py
@@ -0,0 +1,169 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+from typing import TYPE_CHECKING
+
+from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.core_plugins.math_plugin import MathPlugin
+from semantic_kernel.core_plugins.time_plugin import TimePlugin
+from semantic_kernel.functions import KernelArguments
+
+if TYPE_CHECKING:
+    from semantic_kernel.functions import KernelFunction
+
+#####################################################################
+# This sample demonstrates how to build a conversational chatbot    #
+# using Semantic Kernel, featuring auto function calling,           #
+# streaming responses, and support for math and time plugins.       #
+# The chatbot is designed to interact with the user, call functions #
+# as needed, and return responses.                                  #
+#####################################################################
+
+# System message defining the behavior and persona of the chat bot.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose. You are also a math wizard,
+especially for adding and subtracting.
+You also excel at joke telling, where your tone is often sarcastic.
+Once you have the answer I am looking for,
+you will return a full answer to me as soon as possible.
+"""
+
+# Create and configure the kernel.
+kernel = Kernel()
+
+# Load some sample plugins (for demonstration of function calling).
+kernel.add_plugin(MathPlugin(), plugin_name="math")
+kernel.add_plugin(TimePlugin(), plugin_name="time")
+
+# Define a chat function (a template for how to handle user input).
+chat_function = kernel.add_function(
+    prompt="{{$chat_history}}{{$user_input}}",
+    plugin_name="ChatBot",
+    function_name="Chat",
+)
+
+# You can select from the following chat completion services that support function calling:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=True by default.
+# With `auto_invoke=True`, the model will automatically choose and call functions as needed.
+request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
+
+kernel.add_service(chat_completion_service)
+
+# Pass the request settings to the kernel arguments.
+arguments = KernelArguments(settings=request_settings)
+
+# Create a chat history to store the system message, initial messages, and the conversation.
+history = ChatHistory()
+history.add_system_message(system_message)
+history.add_user_message("Hi there, who are you?")
+history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
+
+
+async def handle_streaming(
+    kernel: Kernel,
+    chat_function: "KernelFunction",
+    arguments: KernelArguments,
+) -> str | None:
+    """
+    Handle the streaming response from the model.
+    This function demonstrates two possible paths:
+
+    When auto function calling is ON (auto_invoke=True):
+    - The model may call tools automatically and produce a continuous
+        stream of assistant messages. We can simply print these as they come in.
+    """
+
+    response = kernel.invoke_stream(
+        chat_function,
+        return_function_results=False,
+        arguments=arguments,
+    )
+
+    print("Mosscap:> ", end="", flush=True)
+
+    # For content messages (the final assistant's response text), store them here.
+    streamed_response_chunks: list[StreamingChatMessageContent] = []
+
+    async for message in response:
+        msg = message[0]
+
+        # We only expect assistant messages here.
+        if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT:
+            continue
+
+        # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive.
+        streamed_response_chunks.append(msg)
+        print(str(msg), end="", flush=True)
+
+    print("\n", flush=True)
+
+    # Return the final concatenated assistant response (if any).
+    if streamed_response_chunks:
+        return "".join([str(content) for content in streamed_response_chunks])
+    return None
+
+
+async def chat() -> bool:
+    """
+    Continuously prompt the user for input and show the assistant's response.
+    Type 'exit' to exit.
+    """
+    try:
+        user_input = input("User:> ")
+    except (KeyboardInterrupt, EOFError):
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input.lower().strip() == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    arguments["user_input"] = user_input
+    arguments["chat_history"] = history
+
+    result = await handle_streaming(kernel, chat_function, arguments=arguments)
+
+    # Update the chat history with the user's input and the assistant's response
+    if result:
+        history.add_user_message(user_input)
+        history.add_assistant_message(str(result))
+
+    return True
+
+
+async def main() -> None:
+    print(
+        "Welcome to the chat bot!\n"
+        "  Type 'exit' to exit.\n"
+        "  Try a math question to see function calling in action (e.g. 'what is 3+3?')."
+    )
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py
similarity index 93%
rename from python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py
rename to python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py
index 92d92f17db49..978fa3ea24ec 100644
--- a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py
+++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py
@@ -18,11 +18,11 @@
 
 #####################################################################
 # This sample demonstrates how to build a conversational chatbot    #
-# using Semantic Kernel, featuring dynamic function calling,        #
+# using Semantic Kernel, featuring manual function calling,         #
 # non-streaming responses, and support for math and time plugins.   #
 # The chatbot is designed to interact with the user, call functions #
-# as needed, and return responses. If auto function calling is      #
-# disabled, then the tool calls will be printed to the console.     #
+# as needed, and return responses. With auto function calling       #
+# disabled, the tool calls will be printed to the console.          #
 #####################################################################
 
 # System message defining the behavior and persona of the chat bot.
@@ -67,10 +67,10 @@
 # Please make sure you have configured your environment correctly for the selected chat completion service.
 chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI)
 
-# Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True.
-# - If `auto_invoke=True`, the model will automatically choose and call functions as needed.
-# - If `auto_invoke=False`, the model may return tool call instructions that you must handle and call manually.
-request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True)
+# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=False.
+# With `FunctionChoiceBehavior(auto_invoke=False)`, the model may return tool call instructions
+# that you must handle and call manually. We will only print the tool calls in this sample.
+request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=False)
 
 kernel.add_service(chat_completion_service)
 
diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py
similarity index 88%
rename from python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py
rename to python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py
index e8c6412263cd..b749ff2a87a0 100644
--- a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py
+++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py
@@ -70,10 +70,10 @@
 # Please make sure you have configured your environment correctly for the selected chat completion service.
 chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
 
-# Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True.
-# - If `auto_invoke=True`, the model will automatically choose and call functions as needed.
-# - If `auto_invoke=False`, the model may return tool call instructions that you must handle and call manually.
-request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True)
+# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=False.
+# With `FunctionChoiceBehavior(auto_invoke=False)`, the model may return tool call instructions
+# that you must handle and call manually. We will only print the tool calls in this sample.
+request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=False)
 
 kernel.add_service(chat_completion_service)
 
@@ -137,9 +137,6 @@ async def handle_streaming(
         arguments=arguments,
     )
 
-    # We will differentiate behavior based on whether auto invoking kernel functions is enabled.
-    auto_invoking = request_settings.function_choice_behavior.auto_invoke_kernel_functions
-
     print("Mosscap:> ", end="", flush=True)
 
     # If auto_invoking is False, the model may return separate streaming chunks containing tool instructions.
@@ -156,25 +153,20 @@ async def handle_streaming(
         if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT:
             continue
 
-        if auto_invoking:
-            # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive.
+        # When auto invocation is OFF, the model may send chunks that represent tool calls.
+        # Chunks that contain function call instructions will have a function_invoke_attempt attribute.
+        if hasattr(msg, "function_invoke_attempt"):
+            # This chunk is part of a tool call instruction sequence
+            streamed_tool_chunks.append(msg)
+        else:
+            # This chunk is normal assistant response text
             streamed_response_chunks.append(msg)
             print(str(msg), end="", flush=True)
-        else:
-            # When auto invocation is OFF, the model may send chunks that represent tool calls.
-            # Chunks that contain function call instructions will have a function_invoke_attempt attribute.
-            if hasattr(msg, "function_invoke_attempt"):
-                # This chunk is part of a tool call instruction sequence
-                streamed_tool_chunks.append(msg)
-            else:
-                # This chunk is normal assistant response text
-                streamed_response_chunks.append(msg)
-                print(str(msg), end="", flush=True)
 
     print("\n", flush=True)
 
     # If auto function calling was OFF, handle any tool call instructions we captured.
-    if not auto_invoking and streamed_tool_chunks:
+    if streamed_tool_chunks:
         # Group streamed chunks by `function_invoke_attempt` to handle each invocation attempt separately.
         grouped_chunks = {}
         for chunk in streamed_tool_chunks: