From 27a89bae54590b9d16133b9dedb672616ace5d3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=81=B5=E3=81=81=E3=83=BC?=
 <47295014+ymuichiro@users.noreply.github.com>
Date: Tue, 26 Nov 2024 07:39:07 +0900
Subject: [PATCH] Python: Introduced a new condition to yield
 `StreamingChatMessageContent` directly when usage data is available. (#9753)

### Motivation and Context

issue: https://github.com/microsoft/semantic-kernel/issues/9751

This pull request addresses a bug where setting
`stream_options.include_usage` to `True` does not return token usage,
resulting in `None` for the `usage` field.

The issue occurs when using Azure OpenAI's GPT-4o and GPT-4omini models.
In particular, if the last chunk of the response has an empty `choices`
list, the chunk is skipped entirely, and the token usage is not
processed correctly.

In the Azure OpenAI implementation, if `usage` information is included,
the chunk should be processed appropriately. However, the current code
skips processing when `choices` is empty. This pull request fixes this
behavior so that the chunk is processed when `usage` is present, even if
`choices` is empty.

### Description

This fix includes the following changes:

- Modified the relevant section in `azure_chat_completion.py` to ensure
that chunks with empty `choices` are not skipped if `usage` information
is present.
- Specifically, the condition `if len(chunk.choices) == 0:` was updated
to allow chunks with `usage` data to be processed correctly.

With these changes, setting `stream_options.include_usage` to `True`
will correctly return token usage data, even for chunks where the
`choices` list is empty.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:

---------

Co-authored-by: Tao Chen <taochen@microsoft.com>
---
 .../open_ai/services/azure_chat_completion.py | 53 ++-----------------
 .../services/test_azure_chat_completion.py    |  4 ++
 2 files changed, 7 insertions(+), 50 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
index bd2a0ca51bab..73e1a8fe62b7 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
@@ -2,18 +2,12 @@
 
 import json
 import logging
-import sys
-from collections.abc import AsyncGenerator, Mapping
+from collections.abc import Mapping
 from copy import deepcopy
 from typing import Any, TypeVar
 from uuid import uuid4
 
-if sys.version_info >= (3, 12):
-    from typing import override  # pragma: no cover
-else:
-    from typing_extensions import override  # pragma: no cover
-
-from openai import AsyncAzureOpenAI, AsyncStream
+from openai import AsyncAzureOpenAI
 from openai.lib.azure import AsyncAzureADTokenProvider
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
@@ -23,24 +17,19 @@
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
     AzureChatPromptExecutionSettings,
 )
-from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
-    OpenAIChatPromptExecutionSettings,
-)
 from semantic_kernel.connectors.ai.open_ai.services.azure_config_base import AzureOpenAIConfigBase
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base import OpenAIChatCompletionBase
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIModelTypes
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion_base import OpenAITextCompletionBase
 from semantic_kernel.connectors.ai.open_ai.settings.azure_open_ai_settings import AzureOpenAISettings
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.contents.utils.finish_reason import FinishReason
-from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceInvalidResponseError
-from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_streaming_chat_completion
+from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -121,42 +110,6 @@ def __init__(
             client=async_client,
         )
 
-    @override
-    @trace_streaming_chat_completion(OpenAIChatCompletionBase.MODEL_PROVIDER_NAME)
-    async def _inner_get_streaming_chat_message_contents(
-        self,
-        chat_history: "ChatHistory",
-        settings: "PromptExecutionSettings",
-    ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]:
-        """Override the base method.
-
-        This is because the latest Azure OpenAI API GA version doesn't support `stream_option`
-        yet and it will potentially result in errors if the option is included.
-        This method will be called instead of the base method.
-        TODO: Remove this method when the `stream_option` is supported by the Azure OpenAI API.
-        GitHub Issue: https://github.com/microsoft/semantic-kernel/issues/8996
-        """
-        if not isinstance(settings, OpenAIChatPromptExecutionSettings):
-            settings = self.get_prompt_execution_settings_from_settings(settings)
-        assert isinstance(settings, OpenAIChatPromptExecutionSettings)  # nosec
-
-        settings.stream = True
-        settings.messages = self._prepare_chat_history_for_request(chat_history)
-        settings.ai_model_id = settings.ai_model_id or self.ai_model_id
-
-        response = await self._send_request(settings)
-        if not isinstance(response, AsyncStream):
-            raise ServiceInvalidResponseError("Expected an AsyncStream[ChatCompletionChunk] response.")
-        async for chunk in response:
-            if len(chunk.choices) == 0:
-                continue
-
-            assert isinstance(chunk, ChatCompletionChunk)  # nosec
-            chunk_metadata = self._get_metadata_from_streaming_chat_response(chunk)
-            yield [
-                self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices
-            ]
-
     @classmethod
     def from_dict(cls, settings: dict[str, Any]) -> "AzureChatCompletion":
         """Initialize an Azure OpenAI service from a dictionary of settings.
diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
index eaef9ff64931..a5e8ca638aab 100644
--- a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
+++ b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
@@ -948,4 +948,8 @@ async def test_cmc_streaming(
         model=azure_openai_unit_test_env["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
         stream=True,
         messages=azure_chat_completion._prepare_chat_history_for_request(chat_history),
+        # NOTE: The `stream_options={"include_usage": True}` is explicitly enforced in
+        # `OpenAIChatCompletionBase._inner_get_streaming_chat_message_contents`.
+        # To ensure consistency, we align the arguments here accordingly.
+        stream_options={"include_usage": True},
     )