Rename LLM and Embedding span attributes (#2270)

# Description Rename LLM and Embedding span attributes to align with the OpenTelemetry convention. See #2266 for details. # All Promptflow Contribution checklist: - [ ] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [ ] Pull request includes test coverage for the included changes.
microsoft · Mar 11, 2024 · cd65b2e · cd65b2e
1 parent 626bf39
commit cd65b2e
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 26 deletions.
diff --git a/src/promptflow/promptflow/_constants.py b/src/promptflow/promptflow/_constants.py
@@ -111,9 +111,9 @@ class SpanAttributeFieldName:
     INPUTS = "inputs"
     OUTPUT = "output"
     # token metrics
-    COMPLETION_TOKEN_COUNT = "llm.token_count.completion"
-    PROMPT_TOKEN_COUNT = "llm.token_count.prompt"
-    TOTAL_TOKEN_COUNT = "llm.token_count.total"
+    COMPLETION_TOKEN_COUNT = "llm.usage.completion_tokens"
+    PROMPT_TOKEN_COUNT = "llm.usage.prompt_tokens"
+    TOTAL_TOKEN_COUNT = "llm.usage.total_tokens"
     CUMULATIVE_COMPLETION_TOKEN_COUNT = "__computed__.cumulative_token_count.completion"
     CUMULATIVE_PROMPT_TOKEN_COUNT = "__computed__.cumulative_token_count.prompt"
     CUMULATIVE_TOTAL_TOKEN_COUNT = "__computed__.cumulative_token_count.total"

diff --git a/src/promptflow/promptflow/tracing/_trace.py b/src/promptflow/promptflow/tracing/_trace.py
@@ -12,18 +12,18 @@
 from typing import Callable, List, Optional
 
 import opentelemetry.trace as otel_trace
+from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.trace import Link
-from opentelemetry.trace.status import StatusCode
 from opentelemetry.trace.span import NonRecordingSpan
-from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.trace.status import StatusCode
 
 from promptflow._core.generator_proxy import GeneratorProxy
 from promptflow._core.operation_context import OperationContext
 from promptflow._utils.dataclass_serializer import serialize
 from promptflow._utils.tool_utils import get_inputs_for_prompt_template, get_prompt_param_name_from_func
 
 from .._utils.utils import default_json_encoder
-from ._tracer import _create_trace_from_function_call, get_node_name_from_context, Tracer
+from ._tracer import Tracer, _create_trace_from_function_call, get_node_name_from_context
 from .contracts.trace import TraceType
 
 IS_LEGACY_OPENAI = version("openai").startswith("0.")
@@ -146,6 +146,7 @@ def traced_generator(generator, original_span: ReadableSpan):
         # TODO: Enrich LLM token count for streaming scenario
         if original_span.attributes["span_type"] == "LLM" and not IS_LEGACY_OPENAI:
             from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+
             chunks = []
             role = "assistant"
             for item in generator_output:
@@ -181,7 +182,7 @@ def enrich_span_with_openai_tokens(span, trace_type):
         if tokens:
             span_tokens = {f"__computed__.cumulative_token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
             if trace_type in [TraceType.LLM, TraceType.EMBEDDING]:
-                llm_tokens = {f"{trace_type.value.lower()}.token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
+                llm_tokens = {f"llm.usage.{k}": v for k, v in tokens.items()}
                 span_tokens.update(llm_tokens)
             span.set_attributes(span_tokens)
     except Exception as e:
@@ -193,7 +194,7 @@ def enrich_span_with_embedding(span, inputs, output):
 
     try:
         if isinstance(output, CreateEmbeddingResponse):
-            span.set_attribute("embedding.model", output.model)
+            span.set_attribute("llm.response.model", output.model)
             embeddings = []
             input_list = [emb_input] if _is_single_input(emb_input := inputs["input"]) else emb_input
             for emb in output.data:
@@ -212,10 +213,10 @@ def enrich_span_with_embedding(span, inputs, output):
 def _is_single_input(embedding_inputs):
     # OpenAI Embedding API accepts a single string/tokenized string or a list of string/tokenized string as input.
     # For the single string/tokenized string case, we should return true, otherwise return false.
-    if (isinstance(embedding_inputs, str)):
+    if isinstance(embedding_inputs, str):
         # input is a string
         return True
-    elif (isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs)):
+    elif isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs):
         # input is a token array
         return True
     return False
@@ -228,7 +229,7 @@ def enrich_span_with_llm_model(span, output):
             from openai.types.completion import Completion
 
             if isinstance(output, (ChatCompletion, Completion)):
-                span.set_attribute("llm.model", output.model)
+                span.set_attribute("llm.response.model", output.model)
     except Exception as e:
         logging.warning(f"Failed to enrich span with llm model: {e}")
 

diff --git a/src/promptflow/tests/executor/e2etests/test_traces.py b/src/promptflow/tests/executor/e2etests/test_traces.py
@@ -30,14 +30,16 @@
 ]
 
 LLM_TOKEN_NAMES = [
-    "llm.token_count.prompt",
-    "llm.token_count.completion",
-    "llm.token_count.total",
+    "llm.usage.prompt_tokens",
+    "llm.usage.completion_tokens",
+    "llm.usage.total_tokens",
+    "llm.response.model",
 ]
 
 EMBEDDING_TOKEN_NAMES = [
-    "embedding.token_count.prompt",
-    "embedding.token_count.total",
+    "llm.usage.prompt_tokens",
+    "llm.usage.total_tokens",
+    "llm.response.model",
 ]
 
 CUMULATIVE_LLM_TOKEN_NAMES = [
@@ -427,7 +429,7 @@ def assert_otel_traces_with_llm(self, dev_connections, flow_file, inputs, expect
         self.validate_openai_tokens(span_list)
         for span in span_list:
             if span.attributes.get("function", "") in LLM_FUNCTION_NAMES:
-                assert span.attributes.get("llm.model", "") in ["gpt-35-turbo", "text-ada-001"]
+                assert span.attributes.get("llm.response.model", "") in ["gpt-35-turbo", "text-ada-001"]
 
     @pytest.mark.parametrize(
         "flow_file, inputs, expected_span_length",
@@ -463,7 +465,7 @@ def assert_otel_traces_with_embedding(self, dev_connections, flow_file, inputs,
         self.validate_span_list(span_list, line_run_id, expected_span_length)
         for span in span_list:
             if span.attributes.get("function", "") in EMBEDDING_FUNCTION_NAMES:
-                assert span.attributes.get("embedding.model", "") == "ada"
+                assert span.attributes.get("llm.response.model", "") == "ada"
                 embeddings = span.attributes.get("embedding.embeddings", "")
                 assert "embedding.vector" in embeddings
                 assert "embedding.text" in embeddings

diff --git a/src/promptflow/tests/tracing_test/e2etests/test_trace.py b/src/promptflow/tests/tracing_test/e2etests/test_trace.py
@@ -30,14 +30,16 @@
 ]
 
 LLM_TOKEN_NAMES = [
-    "llm.token_count.prompt",
-    "llm.token_count.completion",
-    "llm.token_count.total",
+    "llm.usage.prompt_tokens",
+    "llm.usage.completion_tokens",
+    "llm.usage.total_tokens",
+    "llm.response.model",
 ]
 
 EMBEDDING_TOKEN_NAMES = [
-    "embedding.token_count.prompt",
-    "embedding.token_count.total",
+    "llm.usage.prompt_tokens",
+    "llm.usage.total_tokens",
+    "llm.response.model",
 ]
 
 CUMULATIVE_LLM_TOKEN_NAMES = [
@@ -77,7 +79,7 @@ def assert_otel_trace(self, func, inputs, expected_span_length):
         "func, inputs",
         [
             (render_prompt_template, {"prompt": "Hello {{name}}!", "name": "world"}),
-        ]
+        ],
     )
     def test_otel_trace_with_prompt(self, func, inputs):
         execute_function_in_subprocess(self.assert_otel_traces_with_prompt, func, inputs)
@@ -130,7 +132,7 @@ def assert_otel_trace_with_llm(self, dev_connections, func, inputs, expected_spa
             (openai_embedding_async, {"input": "Hello"}, 2),
             # [9906] is the tokenized version of "Hello"
             (openai_embedding_async, {"input": [9906]}, 2),
-        ]
+        ],
     )
     def test_otel_trace_with_embedding(
         self,
@@ -156,7 +158,7 @@ def assert_otel_traces_with_embedding(self, dev_connections, func, inputs, expec
         self.validate_openai_tokens(span_list)
         for span in span_list:
             if span.attributes.get("function", "") in EMBEDDING_FUNCTION_NAMES:
-                assert span.attributes.get("embedding.model", "") == "ada"
+                assert span.attributes.get("llm.response.model", "") == "ada"
                 embeddings = span.attributes.get("embedding.embeddings", "")
                 assert "embedding.vector" in embeddings
                 assert "embedding.text" in embeddings