Skip to content

Commit

Permalink
Rename LLM and Embedding span attributes (#2270)
Browse files Browse the repository at this point in the history
# Description

Rename LLM and Embedding span attributes to align with the OpenTelemetry
convention.
See #2266 for details.

# All Promptflow Contribution checklist:
- [ ] **The pull request does not introduce [breaking changes].**
- [ ] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [x] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [x] Title of the pull request is clear and informative.
- [x] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [ ] Pull request includes test coverage for the included changes.
  • Loading branch information
zzn2 authored Mar 11, 2024
1 parent 626bf39 commit cd65b2e
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 26 deletions.
6 changes: 3 additions & 3 deletions src/promptflow/promptflow/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ class SpanAttributeFieldName:
INPUTS = "inputs"
OUTPUT = "output"
# token metrics
COMPLETION_TOKEN_COUNT = "llm.token_count.completion"
PROMPT_TOKEN_COUNT = "llm.token_count.prompt"
TOTAL_TOKEN_COUNT = "llm.token_count.total"
COMPLETION_TOKEN_COUNT = "llm.usage.completion_tokens"
PROMPT_TOKEN_COUNT = "llm.usage.prompt_tokens"
TOTAL_TOKEN_COUNT = "llm.usage.total_tokens"
CUMULATIVE_COMPLETION_TOKEN_COUNT = "__computed__.cumulative_token_count.completion"
CUMULATIVE_PROMPT_TOKEN_COUNT = "__computed__.cumulative_token_count.prompt"
CUMULATIVE_TOTAL_TOKEN_COUNT = "__computed__.cumulative_token_count.total"
Expand Down
17 changes: 9 additions & 8 deletions src/promptflow/promptflow/tracing/_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@
from typing import Callable, List, Optional

import opentelemetry.trace as otel_trace
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.trace import Link
from opentelemetry.trace.status import StatusCode
from opentelemetry.trace.span import NonRecordingSpan
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.trace.status import StatusCode

from promptflow._core.generator_proxy import GeneratorProxy
from promptflow._core.operation_context import OperationContext
from promptflow._utils.dataclass_serializer import serialize
from promptflow._utils.tool_utils import get_inputs_for_prompt_template, get_prompt_param_name_from_func

from .._utils.utils import default_json_encoder
from ._tracer import _create_trace_from_function_call, get_node_name_from_context, Tracer
from ._tracer import Tracer, _create_trace_from_function_call, get_node_name_from_context
from .contracts.trace import TraceType

IS_LEGACY_OPENAI = version("openai").startswith("0.")
Expand Down Expand Up @@ -146,6 +146,7 @@ def traced_generator(generator, original_span: ReadableSpan):
# TODO: Enrich LLM token count for streaming scenario
if original_span.attributes["span_type"] == "LLM" and not IS_LEGACY_OPENAI:
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk

chunks = []
role = "assistant"
for item in generator_output:
Expand Down Expand Up @@ -181,7 +182,7 @@ def enrich_span_with_openai_tokens(span, trace_type):
if tokens:
span_tokens = {f"__computed__.cumulative_token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
if trace_type in [TraceType.LLM, TraceType.EMBEDDING]:
llm_tokens = {f"{trace_type.value.lower()}.token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
llm_tokens = {f"llm.usage.{k}": v for k, v in tokens.items()}
span_tokens.update(llm_tokens)
span.set_attributes(span_tokens)
except Exception as e:
Expand All @@ -193,7 +194,7 @@ def enrich_span_with_embedding(span, inputs, output):

try:
if isinstance(output, CreateEmbeddingResponse):
span.set_attribute("embedding.model", output.model)
span.set_attribute("llm.response.model", output.model)
embeddings = []
input_list = [emb_input] if _is_single_input(emb_input := inputs["input"]) else emb_input
for emb in output.data:
Expand All @@ -212,10 +213,10 @@ def enrich_span_with_embedding(span, inputs, output):
def _is_single_input(embedding_inputs):
# OpenAI Embedding API accepts a single string/tokenized string or a list of string/tokenized string as input.
# For the single string/tokenized string case, we should return true, otherwise return false.
if (isinstance(embedding_inputs, str)):
if isinstance(embedding_inputs, str):
# input is a string
return True
elif (isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs)):
elif isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs):
# input is a token array
return True
return False
Expand All @@ -228,7 +229,7 @@ def enrich_span_with_llm_model(span, output):
from openai.types.completion import Completion

if isinstance(output, (ChatCompletion, Completion)):
span.set_attribute("llm.model", output.model)
span.set_attribute("llm.response.model", output.model)
except Exception as e:
logging.warning(f"Failed to enrich span with llm model: {e}")

Expand Down
16 changes: 9 additions & 7 deletions src/promptflow/tests/executor/e2etests/test_traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,16 @@
]

LLM_TOKEN_NAMES = [
"llm.token_count.prompt",
"llm.token_count.completion",
"llm.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.completion_tokens",
"llm.usage.total_tokens",
"llm.response.model",
]

EMBEDDING_TOKEN_NAMES = [
"embedding.token_count.prompt",
"embedding.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.total_tokens",
"llm.response.model",
]

CUMULATIVE_LLM_TOKEN_NAMES = [
Expand Down Expand Up @@ -427,7 +429,7 @@ def assert_otel_traces_with_llm(self, dev_connections, flow_file, inputs, expect
self.validate_openai_tokens(span_list)
for span in span_list:
if span.attributes.get("function", "") in LLM_FUNCTION_NAMES:
assert span.attributes.get("llm.model", "") in ["gpt-35-turbo", "text-ada-001"]
assert span.attributes.get("llm.response.model", "") in ["gpt-35-turbo", "text-ada-001"]

@pytest.mark.parametrize(
"flow_file, inputs, expected_span_length",
Expand Down Expand Up @@ -463,7 +465,7 @@ def assert_otel_traces_with_embedding(self, dev_connections, flow_file, inputs,
self.validate_span_list(span_list, line_run_id, expected_span_length)
for span in span_list:
if span.attributes.get("function", "") in EMBEDDING_FUNCTION_NAMES:
assert span.attributes.get("embedding.model", "") == "ada"
assert span.attributes.get("llm.response.model", "") == "ada"
embeddings = span.attributes.get("embedding.embeddings", "")
assert "embedding.vector" in embeddings
assert "embedding.text" in embeddings
Expand Down
18 changes: 10 additions & 8 deletions src/promptflow/tests/tracing_test/e2etests/test_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,16 @@
]

LLM_TOKEN_NAMES = [
"llm.token_count.prompt",
"llm.token_count.completion",
"llm.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.completion_tokens",
"llm.usage.total_tokens",
"llm.response.model",
]

EMBEDDING_TOKEN_NAMES = [
"embedding.token_count.prompt",
"embedding.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.total_tokens",
"llm.response.model",
]

CUMULATIVE_LLM_TOKEN_NAMES = [
Expand Down Expand Up @@ -77,7 +79,7 @@ def assert_otel_trace(self, func, inputs, expected_span_length):
"func, inputs",
[
(render_prompt_template, {"prompt": "Hello {{name}}!", "name": "world"}),
]
],
)
def test_otel_trace_with_prompt(self, func, inputs):
execute_function_in_subprocess(self.assert_otel_traces_with_prompt, func, inputs)
Expand Down Expand Up @@ -130,7 +132,7 @@ def assert_otel_trace_with_llm(self, dev_connections, func, inputs, expected_spa
(openai_embedding_async, {"input": "Hello"}, 2),
# [9906] is the tokenized version of "Hello"
(openai_embedding_async, {"input": [9906]}, 2),
]
],
)
def test_otel_trace_with_embedding(
self,
Expand All @@ -156,7 +158,7 @@ def assert_otel_traces_with_embedding(self, dev_connections, func, inputs, expec
self.validate_openai_tokens(span_list)
for span in span_list:
if span.attributes.get("function", "") in EMBEDDING_FUNCTION_NAMES:
assert span.attributes.get("embedding.model", "") == "ada"
assert span.attributes.get("llm.response.model", "") == "ada"
embeddings = span.attributes.get("embedding.embeddings", "")
assert "embedding.vector" in embeddings
assert "embedding.text" in embeddings
Expand Down

0 comments on commit cd65b2e

Please sign in to comment.