Skip to content

Commit

Permalink
Break the auto function calling samples into two: streaming and non-s…
Browse files Browse the repository at this point in the history
…treaming
  • Loading branch information
moonbox3 committed Dec 18, 2024
1 parent 69c7a6a commit ac5a105
Show file tree
Hide file tree
Showing 2 changed files with 269 additions and 116 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright (c) Microsoft. All rights reserved.

import asyncio
from functools import reduce
from typing import TYPE_CHECKING

from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings
Expand All @@ -10,14 +9,21 @@
from semantic_kernel.contents import ChatHistory
from semantic_kernel.contents.chat_message_content import ChatMessageContent
from semantic_kernel.contents.function_call_content import FunctionCallContent
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
from semantic_kernel.contents.utils.author_role import AuthorRole
from semantic_kernel.core_plugins.math_plugin import MathPlugin
from semantic_kernel.core_plugins.time_plugin import TimePlugin
from semantic_kernel.functions import KernelArguments

if TYPE_CHECKING:
from semantic_kernel.functions import KernelFunction
pass

#####################################################################
# This sample demonstrates how to build a conversational chatbot #
# using Semantic Kernel, featuring dynamic function calling, #
# non-streaming responses, and support for math and time plugins. #
# The chatbot is designed to interact with the user, call functions #
# as needed, and return responses. If auto function calling is #
# disabled, then the tool calls will be printed to the console. #
#####################################################################

# System message defining the behavior and persona of the chat bot.
system_message = """
Expand All @@ -33,9 +39,6 @@
you will return a full answer to me as soon as possible.
"""

# Toggle this flag to switch between streaming and non-streaming modes.
stream = True

# Create and configure the kernel.
kernel = Kernel()

Expand All @@ -62,7 +65,7 @@
# - Services.ONNX
# - Services.VERTEX_AI
# Please make sure you have configured your environment correctly for the selected chat completion service.
chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI)

# Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True.
# - If `auto_invoke=True`, the model will automatically choose and call functions as needed.
Expand Down Expand Up @@ -105,96 +108,6 @@ def print_tool_calls(message: ChatMessageContent) -> None:
print("\n[No tool calls returned by the model]")


async def handle_streaming(
kernel: Kernel,
chat_function: "KernelFunction",
arguments: KernelArguments,
) -> str | None:
"""
Handle the streaming response from the model.
This function demonstrates two possible paths:
1. When auto function calling is ON (auto_invoke=True):
- The model may call tools automatically and produce a continuous
stream of assistant messages. We can simply print these as they come in.
2. When auto function calling is OFF (auto_invoke=False):
- The model may instead return tool call instructions embedded in the stream.
We can track these calls using `function_invoke_attempt` attributes and print
them for the user. The user can then manually invoke the tools and return the results
to the model for further completion.
"""

response = kernel.invoke_stream(
chat_function,
return_function_results=False,
arguments=arguments,
)

# We will differentiate behavior based on whether auto invoking kernel functions is enabled.
auto_invoking = request_settings.function_choice_behavior.auto_invoke_kernel_functions

print("Mosscap:> ", end="", flush=True)

# If auto_invoking is False, the model may return separate streaming chunks containing tool instructions.
# We'll store them here.
streamed_tool_chunks: list[StreamingChatMessageContent] = []

# For content messages (the final assistant's response text), store them here.
streamed_response_chunks: list[StreamingChatMessageContent] = []

async for message in response:
msg = message[0]

# We only expect assistant messages here.
if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT:
continue

if auto_invoking:
# When auto invocation is ON, no special handling is needed. Just print out messages as they arrive.
streamed_response_chunks.append(msg)
print(str(msg), end="", flush=True)
else:
# When auto invocation is OFF, the model may send chunks that represent tool calls.
# Chunks that contain function call instructions will have a function_invoke_attempt attribute.
if hasattr(msg, "function_invoke_attempt"):
# This chunk is part of a tool call instruction sequence
streamed_tool_chunks.append(msg)
else:
# This chunk is normal assistant response text
streamed_response_chunks.append(msg)
print(str(msg), end="", flush=True)

print("\n", flush=True)

# If auto function calling was OFF, handle any tool call instructions we captured.
if not auto_invoking and streamed_tool_chunks:
# Group streamed chunks by `function_invoke_attempt` to handle each invocation attempt separately.
grouped_chunks = {}
for chunk in streamed_tool_chunks:
key = getattr(chunk, "function_invoke_attempt", None)
if key is not None:
grouped_chunks.setdefault(key, []).append(chunk)

# Process each group of chunks
for attempt, chunks in grouped_chunks.items():
try:
# Combine all chunks for a given attempt into one message.
combined_content = reduce(lambda first, second: first + second, chunks)
if hasattr(combined_content, "content"):
print(f"[function_invoke_attempt {attempt} content]:\n{combined_content.content}")

print("[Auto function calling is OFF] Here are the returned tool calls:")
print_tool_calls(combined_content)
except Exception as e:
print(f"Error processing chunks for function_invoke_attempt {attempt}: {e}")

# Return the final concatenated assistant response (if any).
if streamed_response_chunks:
return "".join([str(content) for content in streamed_response_chunks])
return None


async def chat() -> bool:
"""
Continuously prompt the user for input and show the assistant's response.
Expand All @@ -213,26 +126,22 @@ async def chat() -> bool:
arguments["user_input"] = user_input
arguments["chat_history"] = history

if stream:
# Handle streaming responses
result = await handle_streaming(kernel, chat_function, arguments=arguments)
else:
# Handle non-streaming responses
result = await kernel.invoke(chat_function, arguments=arguments)
# Handle non-streaming responses
result = await kernel.invoke(chat_function, arguments=arguments)

# If function calls are returned and auto invoking is off, we must show them.
if not request_settings.function_choice_behavior.auto_invoke_kernel_functions and result and result.value:
# Extract function calls from the returned content
function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)]
if len(function_calls) > 0:
print_tool_calls(result.value[0])
# At this point, you'd handle these calls manually if desired.
# For now, we just print them.
return True
# If function calls are returned and auto invoking is off, we must show them.
if not request_settings.function_choice_behavior.auto_invoke_kernel_functions and result and result.value:
# Extract function calls from the returned content
function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)]
if len(function_calls) > 0:
print_tool_calls(result.value[0])
# At this point, you'd handle these calls manually if desired.
# For now, we just print them.
return True

# If no function calls to handle, just print the assistant's response
if result:
print(f"Mosscap:> {result}")
# If no function calls to handle, just print the assistant's response
if result:
print(f"Mosscap:> {result}")

# Update the chat history with the user's input and the assistant's response
if result:
Expand Down
Loading

0 comments on commit ac5a105

Please sign in to comment.