diff --git a/python/.cspell.json b/python/.cspell.json index 804c4ebfa4c6..949cce6e3c9a 100644 --- a/python/.cspell.json +++ b/python/.cspell.json @@ -55,6 +55,8 @@ "huggingface", "pytestmark", "contoso", - "opentelemetry" + "opentelemetry", + "SEMANTICKERNEL", + "OTEL" ] } \ No newline at end of file diff --git a/python/samples/demos/telemetry_with_application_insights/.env.example b/python/samples/demos/telemetry_with_application_insights/.env.example index 3ee18ae9e6b0..be404f15d7ff 100644 --- a/python/samples/demos/telemetry_with_application_insights/.env.example +++ b/python/samples/demos/telemetry_with_application_insights/.env.example @@ -1 +1,3 @@ -TELEMETRY_SAMPLE_CONNECTION_STRING="..." \ No newline at end of file +TELEMETRY_SAMPLE_CONNECTION_STRING="..." +SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS=true +SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE=true \ No newline at end of file diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py index 3b9a7de99182..94c4c77d98ea 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py @@ -1,8 +1,14 @@ # Copyright (c) Microsoft. All rights reserved. import logging +import sys from collections.abc import AsyncGenerator -from typing import Any +from typing import Any, ClassVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover from anthropic import AsyncAnthropic from anthropic.types import ( @@ -29,11 +35,9 @@ from semantic_kernel.contents.text_content import TextContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.contents.utils.finish_reason import FinishReason as SemanticKernelFinishReason -from semantic_kernel.exceptions.service_exceptions import ( - ServiceInitializationError, - ServiceResponseException, -) +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceResponseException from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion # map finish reasons from Anthropic to Semantic Kernel ANTHROPIC_TO_SEMANTIC_KERNEL_FINISH_REASON_MAP = { @@ -49,8 +53,10 @@ class AnthropicChatCompletion(ChatCompletionClientBase): """Antropic ChatCompletion class.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "anthropic" + async_client: AsyncAnthropic - + def __init__( self, ai_model_id: str | None = None, @@ -68,10 +74,10 @@ def __init__( service_id: Service ID tied to the execution settings. api_key: The optional API key to use. If provided will override, the env vars or .env file value. - async_client: An existing client to use. + async_client: An existing client to use. env_file_path: Use the environment settings file as a fallback - to environment variables. - env_file_encoding: The encoding of the environment settings file. + to environment variables. + env_file_encoding: The encoding of the environment settings file. """ try: anthropic_settings = AnthropicSettings.create( @@ -82,7 +88,7 @@ def __init__( ) except ValidationError as ex: raise ServiceInitializationError("Failed to create Anthropic settings.", ex) from ex - + if not anthropic_settings.chat_model_id: raise ServiceInitializationError("The Anthropic chat model ID is required.") @@ -97,12 +103,14 @@ def __init__( ai_model_id=anthropic_settings.chat_model_id, ) + @override + @trace_chat_completion(MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", **kwargs: Any, - ) -> list["ChatMessageContent"]: + ) -> list["ChatMessageContent"]: """Executes a chat completion request and returns the result. Args: @@ -127,22 +135,23 @@ async def get_chat_message_contents( raise ServiceResponseException( f"{type(self)} service failed to complete the prompt", ex, - ) from ex - + ) from ex + metadata: dict[str, Any] = {"id": response.id} # Check if usage exists and has a value, then add it to the metadata if hasattr(response, "usage") and response.usage is not None: metadata["usage"] = response.usage - return [self._create_chat_message_content(response, content_block, metadata) - for content_block in response.content] - + return [ + self._create_chat_message_content(response, content_block, metadata) for content_block in response.content + ] + async def get_streaming_chat_message_contents( self, chat_history: ChatHistory, - settings: PromptExecutionSettings, + settings: PromptExecutionSettings, **kwargs: Any, - ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]: + ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]: """Executes a streaming chat completion request and returns the result. Args: @@ -166,17 +175,18 @@ async def get_streaming_chat_message_contents( author_role = None metadata: dict[str, Any] = {"usage": {}, "id": None} content_block_idx = 0 - + async for stream_event in stream: if isinstance(stream_event, RawMessageStartEvent): author_role = stream_event.message.role metadata["usage"]["input_tokens"] = stream_event.message.usage.input_tokens metadata["id"] = stream_event.message.id elif isinstance(stream_event, (RawContentBlockDeltaEvent, RawMessageDeltaEvent)): - yield [self._create_streaming_chat_message_content(stream_event, - content_block_idx, - author_role, - metadata)] + yield [ + self._create_streaming_chat_message_content( + stream_event, content_block_idx, author_role, metadata + ) + ] elif isinstance(stream_event, ContentBlockStopEvent): content_block_idx += 1 @@ -187,21 +197,18 @@ async def get_streaming_chat_message_contents( ) from ex def _create_chat_message_content( - self, - response: Message, - content: TextBlock, - response_metadata: dict[str, Any] + self, response: Message, content: TextBlock, response_metadata: dict[str, Any] ) -> "ChatMessageContent": """Create a chat message content object.""" items: list[ITEM_TYPES] = [] - + if content.text: items.append(TextContent(text=content.text)) finish_reason = None if response.stop_reason: finish_reason = ANTHROPIC_TO_SEMANTIC_KERNEL_FINISH_REASON_MAP[response.stop_reason] - + return ChatMessageContent( inner_content=response, ai_model_id=self.ai_model_id, @@ -212,20 +219,20 @@ def _create_chat_message_content( ) def _create_streaming_chat_message_content( - self, - stream_event: RawContentBlockDeltaEvent | RawMessageDeltaEvent, - content_block_idx: int, - role: str | None = None, - metadata: dict[str, Any] = {} + self, + stream_event: RawContentBlockDeltaEvent | RawMessageDeltaEvent, + content_block_idx: int, + role: str | None = None, + metadata: dict[str, Any] = {}, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a choice.""" text_content = "" - + if stream_event.delta and hasattr(stream_event.delta, "text"): text_content = stream_event.delta.text - + items: list[STREAMING_ITEM_TYPES] = [StreamingTextContent(choice_index=content_block_idx, text=text_content)] - + finish_reason = None if isinstance(stream_event, RawMessageDeltaEvent): if stream_event.delta.stop_reason: @@ -246,4 +253,3 @@ def _create_streaming_chat_message_content( def get_prompt_execution_settings_class(self) -> "type[AnthropicChatPromptExecutionSettings]": """Create a request settings object.""" return AnthropicChatPromptExecutionSettings - diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py index 32550fa71697..3d64c38ce5bc 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py @@ -3,6 +3,7 @@ import asyncio import contextlib from abc import ABC +from typing import ClassVar from azure.ai.inference.aio import ChatCompletionsClient, EmbeddingsClient @@ -14,6 +15,8 @@ class AzureAIInferenceBase(KernelBaseModel, ABC): """Azure AI Inference Chat Completion Service.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "azureai" + client: ChatCompletionsClient | EmbeddingsClient def __del__(self) -> None: diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py index 92fe5bb2af71..b56562fc8a35 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py @@ -7,6 +7,7 @@ from functools import reduce from typing import TYPE_CHECKING, Any +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT if sys.version_info >= (3, 12): @@ -119,6 +120,8 @@ def __init__( ) # region Non-streaming + @override + @trace_chat_completion(AzureAIInferenceBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_base.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_base.py index 91446835302d..5bbc19568bc1 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_base.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_base.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. from abc import ABC +from typing import ClassVar from semantic_kernel.connectors.ai.google.google_ai.google_ai_settings import GoogleAISettings from semantic_kernel.kernel_pydantic import KernelBaseModel @@ -9,4 +10,6 @@ class GoogleAIBase(KernelBaseModel, ABC): """Google AI Service.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "googleai" + service_settings: GoogleAISettings diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py index 2b65dc298111..c33affe047cb 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py @@ -40,6 +40,7 @@ from semantic_kernel.contents.utils.finish_reason import FinishReason from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.kernel import Kernel +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -109,6 +110,7 @@ def __init__( # region Non-streaming @override + @trace_chat_completion(GoogleAIBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_completion.py index a38201db6b67..3590b8b4d51c 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_completion.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_completion.py @@ -15,6 +15,7 @@ ) from semantic_kernel.connectors.ai.google.google_ai.services.google_ai_base import GoogleAIBase from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_text_completion if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -78,6 +79,7 @@ def __init__( # region Non-streaming @override + @trace_text_completion(GoogleAIBase.MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py index e17b1994424d..29e5d2502b63 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. from abc import ABC +from typing import ClassVar from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_settings import VertexAISettings from semantic_kernel.kernel_pydantic import KernelBaseModel @@ -9,4 +10,6 @@ class VertexAIBase(KernelBaseModel, ABC): """Vertex AI Service.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "vertexai" + service_settings: VertexAISettings diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py index b2519d1e5edc..53116630b632 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py @@ -45,6 +45,7 @@ ) from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.kernel import Kernel +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -103,6 +104,7 @@ def __init__( # region Non-streaming @override + @trace_chat_completion(VertexAIBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_text_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_text_completion.py index 6919b6ba521e..e874ba21f254 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_text_completion.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_text_completion.py @@ -19,6 +19,7 @@ from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_text_completion if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -74,6 +75,7 @@ def __init__( # region Non-streaming @override + @trace_text_completion(VertexAIBase.MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_base.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_base.py new file mode 100644 index 000000000000..0e18409f9e08 --- /dev/null +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_base.py @@ -0,0 +1,16 @@ +# Copyright (c) Microsoft. All rights reserved. + +from abc import ABC +from typing import ClassVar + +from mistralai.async_client import MistralAsyncClient + +from semantic_kernel.kernel_pydantic import KernelBaseModel + + +class MistralAIBase(KernelBaseModel, ABC): + """Mistral AI service base.""" + + MODEL_PROVIDER_NAME: ClassVar[str] = "mistralai" + + async_client: MistralAsyncClient diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py index ffd6bc2594ad..fc23b451d253 100644 --- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py @@ -1,9 +1,15 @@ # Copyright (c) Microsoft. All rights reserved. import logging +import sys from collections.abc import AsyncGenerator from typing import Any +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + from mistralai.async_client import MistralAsyncClient from mistralai.models.chat_completion import ( ChatCompletionResponse, @@ -19,6 +25,7 @@ from semantic_kernel.connectors.ai.mistral_ai.prompt_execution_settings.mistral_ai_prompt_execution_settings import ( MistralAIChatPromptExecutionSettings, ) +from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_base import MistralAIBase from semantic_kernel.connectors.ai.mistral_ai.settings.mistral_ai_settings import MistralAISettings from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_history import ChatHistory @@ -29,23 +36,20 @@ from semantic_kernel.contents.text_content import TextContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.contents.utils.finish_reason import FinishReason -from semantic_kernel.exceptions.service_exceptions import ( - ServiceInitializationError, - ServiceResponseException, -) +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceResponseException from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion logger: logging.Logger = logging.getLogger(__name__) @experimental_class -class MistralAIChatCompletion(ChatCompletionClientBase): +class MistralAIChatCompletion(MistralAIBase, ChatCompletionClientBase): """Mistral Chat completion class.""" prompt_tokens: int = 0 completion_tokens: int = 0 total_tokens: int = 0 - async_client: MistralAsyncClient def __init__( self, @@ -64,10 +68,10 @@ def __init__( service_id (str | None): Service ID tied to the execution settings. api_key (str | None): The optional API key to use. If provided will override, the env vars or .env file value. - async_client (MistralAsyncClient | None) : An existing client to use. + async_client (MistralAsyncClient | None) : An existing client to use. env_file_path (str | None): Use the environment settings file as a fallback - to environment variables. - env_file_encoding (str | None): The encoding of the environment settings file. + to environment variables. + env_file_encoding (str | None): The encoding of the environment settings file. """ try: mistralai_settings = MistralAISettings.create( @@ -78,7 +82,7 @@ def __init__( ) except ValidationError as ex: raise ServiceInitializationError("Failed to create MistralAI settings.", ex) from ex - + if not mistralai_settings.chat_model_id: raise ServiceInitializationError("The MistralAI chat model ID is required.") @@ -93,12 +97,14 @@ def __init__( ai_model_id=ai_model_id or mistralai_settings.chat_model_id, ) + @override + @trace_chat_completion(MistralAIBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", **kwargs: Any, - ) -> list["ChatMessageContent"]: + ) -> list["ChatMessageContent"]: """Executes a chat completion request and returns the result. Args: @@ -124,18 +130,18 @@ async def get_chat_message_contents( raise ServiceResponseException( f"{type(self)} service failed to complete the prompt", ex, - ) from ex - + ) from ex + self.store_usage(response) response_metadata = self._get_metadata_from_response(response) return [self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices] - + async def get_streaming_chat_message_contents( self, chat_history: ChatHistory, - settings: PromptExecutionSettings, + settings: PromptExecutionSettings, **kwargs: Any, - ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]: + ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]: """Executes a streaming chat completion request and returns the result. Args: @@ -181,7 +187,7 @@ def _create_chat_message_content( metadata.update(response_metadata) items: list[Any] = self._get_tool_calls_from_chat_choice(choice) - + if choice.message.content: items.append(TextContent(text=choice.message.content)) @@ -220,8 +226,7 @@ def _create_streaming_chat_message_content( ) def _get_metadata_from_response( - self, - response: ChatCompletionResponse | ChatCompletionStreamResponse + self, response: ChatCompletionResponse | ChatCompletionStreamResponse ) -> dict[str, Any]: """Get metadata from a chat response.""" metadata: dict[str, Any] = { @@ -231,27 +236,26 @@ def _get_metadata_from_response( # Check if usage exists and has a value, then add it to the metadata if hasattr(response, "usage") and response.usage is not None: metadata["usage"] = response.usage - + return metadata def _get_metadata_from_chat_choice( - self, - choice: ChatCompletionResponseChoice | ChatCompletionResponseStreamChoice + self, choice: ChatCompletionResponseChoice | ChatCompletionResponseStreamChoice ) -> dict[str, Any]: """Get metadata from a chat choice.""" return { "logprobs": getattr(choice, "logprobs", None), } - - def _get_tool_calls_from_chat_choice(self, - choice: ChatCompletionResponseChoice | ChatCompletionResponseStreamChoice + + def _get_tool_calls_from_chat_choice( + self, choice: ChatCompletionResponseChoice | ChatCompletionResponseStreamChoice ) -> list[FunctionCallContent]: """Get tool calls from a chat choice.""" - content: ChatMessage | DeltaMessage + content: ChatMessage | DeltaMessage content = choice.message if isinstance(choice, ChatCompletionResponseChoice) else choice.delta if content.tool_calls is None: return [] - + return [ FunctionCallContent( id=tool.id, @@ -267,7 +271,7 @@ def _get_tool_calls_from_chat_choice(self, def get_prompt_execution_settings_class(self) -> "type[MistralAIChatPromptExecutionSettings]": """Create a request settings object.""" return MistralAIChatPromptExecutionSettings - + def store_usage(self, response): """Store the usage information from the response.""" if not isinstance(response, AsyncGenerator): diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py index 24b2905b1587..8bf76e5303b1 100644 --- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py @@ -6,6 +6,7 @@ from typing import Any, override # pragma: no cover else: from typing_extensions import Any, override # pragma: no cover + import logging from mistralai.async_client import MistralAsyncClient @@ -14,6 +15,7 @@ from pydantic import ValidationError from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase +from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_base import MistralAIBase from semantic_kernel.connectors.ai.mistral_ai.settings.mistral_ai_settings import MistralAISettings from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceResponseException @@ -23,19 +25,17 @@ @experimental_class -class MistralAITextEmbedding(EmbeddingGeneratorBase): +class MistralAITextEmbedding(MistralAIBase, EmbeddingGeneratorBase): """Mistral AI Inference Text Embedding Service.""" - client: MistralAsyncClient - def __init__( self, ai_model_id: str | None = None, api_key: str | None = None, service_id: str | None = None, + async_client: MistralAsyncClient | None = None, env_file_path: str | None = None, env_file_encoding: str | None = None, - client: MistralAsyncClient | None = None, ) -> None: """Initialize the Mistral AI Text Embedding service. @@ -45,12 +45,12 @@ def __init__( - MISTRALAI_EMBEDDING_MODEL_ID Args: - ai_model_id: (str | None): A string that is used to identify the model such as the model name. - api_key (str | None): The API key for the Mistral AI service deployment. - service_id (str | None): Service ID for the embedding completion service. - env_file_path (str | None): The path to the environment file. - env_file_encoding (str | None): The encoding of the environment file. - client (MistralAsyncClient | None): The Mistral AI client to use. + ai_model_id: (str | None): A string that is used to identify the model such as the model name. + api_key (str | None): The API key for the Mistral AI service deployment. + service_id (str | None): Service ID for the embedding completion service. + async_client (MistralAsyncClient | None): The Mistral AI client to use. + env_file_path (str | None): The path to the environment file. + env_file_encoding (str | None): The encoding of the environment file. Raises: ServiceInitializationError: If an error occurs during initialization. @@ -68,15 +68,13 @@ def __init__( if not mistralai_settings.embedding_model_id: raise ServiceInitializationError("The MistralAI embedding model ID is required.") - if not client: - client = MistralAsyncClient( - api_key=mistralai_settings.api_key.get_secret_value() - ) + if not async_client: + async_client = MistralAsyncClient(api_key=mistralai_settings.api_key.get_secret_value()) super().__init__( service_id=service_id or mistralai_settings.embedding_model_id, ai_model_id=ai_model_id or mistralai_settings.embedding_model_id, - client=client, + async_client=async_client, ) @override @@ -98,10 +96,8 @@ async def generate_raw_embeddings( ) -> Any: """Generate embeddings from the Mistral AI service.""" try: - - embedding_response: EmbeddingResponse = await self.client.embeddings( - model=self.ai_model_id, - input=texts + embedding_response: EmbeddingResponse = await self.async_client.embeddings( + model=self.ai_model_id, input=texts ) except Exception as ex: raise ServiceResponseException( diff --git a/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py index 7e365bea3d5c..6ff69be7dc12 100644 --- a/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py @@ -11,6 +11,9 @@ class OllamaPromptExecutionSettings(PromptExecutionSettings): format: Literal["json"] | None = None options: dict[str, Any] | None = None + # TODO(@taochen): Add individual properties for execution settings and + # convert them to the appropriate types in the options dictionary. + class OllamaTextPromptExecutionSettings(OllamaPromptExecutionSettings): """Settings for Ollama text prompt execution.""" diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_base.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_base.py index ceffb48d9dbf..f03ad0e994d1 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_base.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_base.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. from abc import ABC +from typing import ClassVar from ollama import AsyncClient @@ -14,4 +15,6 @@ class OllamaBase(KernelBaseModel, ABC): client [AsyncClient]: An Ollama client to use for the service. """ + MODEL_PROVIDER_NAME: ClassVar[str] = "ollama" + client: AsyncClient diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py index 1c3ffe3080b7..2e9adb09fddb 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py @@ -25,6 +25,7 @@ from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceInvalidResponseError +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion, trace_text_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -74,6 +75,8 @@ def __init__( client=client or AsyncClient(host=ollama_settings.host), ) + @override + @trace_chat_completion(OllamaBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, @@ -162,6 +165,8 @@ async def get_streaming_chat_message_contents( ) ] + @override + @trace_text_completion(OllamaBase.MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_text_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_text_completion.py index 351c4e768fea..e02f98723d96 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_text_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_text_completion.py @@ -20,6 +20,7 @@ from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceInvalidResponseError +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_text_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -69,6 +70,8 @@ def __init__( client=client or AsyncClient(host=ollama_settings.host), ) + @override + @trace_text_completion(OllamaBase.MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index e23cd9799e61..786be4efb996 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -43,7 +43,7 @@ from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import ( AutoFunctionInvocationContext, ) -from semantic_kernel.utils.telemetry.decorators import trace_chat_completion +from semantic_kernel.utils.telemetry.model_diagnostics import trace_chat_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py index fbcb90767e46..40b445cce480 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py @@ -26,7 +26,7 @@ from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent -from semantic_kernel.utils.telemetry.decorators import trace_text_completion +from semantic_kernel.utils.telemetry.model_diagnostics import trace_text_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings diff --git a/python/semantic_kernel/utils/telemetry/decorators.py b/python/semantic_kernel/utils/telemetry/decorators.py deleted file mode 100644 index 366168ae3938..000000000000 --- a/python/semantic_kernel/utils/telemetry/decorators.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. -# -# Code to trace model activities with the OTel semantic conventions. -# This code contains experimental features and may change in the future. -# To enable these features, set one of the following senvironment variables to true: -# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS -# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE - -import functools -import json -import os -from collections.abc import Callable -from typing import Any - -from opentelemetry.trace import Span, StatusCode, get_tracer, use_span - -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings -from semantic_kernel.contents.chat_history import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.text_content import TextContent -from semantic_kernel.utils.telemetry.const import ( - CHAT_COMPLETION_OPERATION, - COMPLETION_EVENT, - COMPLETION_EVENT_COMPLETION, - COMPLETION_TOKENS, - ERROR_TYPE, - FINISH_REASON, - MAX_TOKENS, - MODEL, - OPERATION, - PROMPT_EVENT, - PROMPT_EVENT_PROMPT, - PROMPT_TOKENS, - RESPONSE_ID, - SYSTEM, - TEMPERATURE, - TEXT_COMPLETION_OPERATION, - TOP_P, -) - -OTEL_ENABLED_ENV_VAR = "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS" -OTEL_SENSITIVE_ENABLED_ENV_VAR = "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE" - - -_enable_diagnostics = os.getenv(OTEL_ENABLED_ENV_VAR, "false").lower() in ("true", "1", "t") -_enable_sensitive_events = os.getenv(OTEL_SENSITIVE_ENABLED_ENV_VAR, "false").lower() in ("true", "1", "t") - -# Creates a tracer from the global tracer provider -tracer = get_tracer(__name__) - - -def are_model_diagnostics_enabled() -> bool: - """Check if model diagnostics are enabled. - - Model diagnostics are enabled if either _enable_diagnostics or _enable_sensitive_events is set. - """ - return _enable_diagnostics or _enable_sensitive_events - - -def are_sensitive_events_enabled() -> bool: - """Check if sensitive events are enabled. - - Sensitive events are enabled if _enable_sensitive_events is set. - """ - return _enable_sensitive_events - - -def trace_chat_completion(model_provider: str) -> Callable: - """Decorator to trace chat completion activities.""" - - def inner_trace_chat_completion(completion_func: Callable) -> Callable: - @functools.wraps(completion_func) - async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[ChatMessageContent]: - chat_history: ChatHistory = kwargs["chat_history"] - settings: PromptExecutionSettings = kwargs["settings"] - - model_name = getattr(settings, "ai_model_id", None) or getattr(args[0], "ai_model_id", None) or "unknown" - - formatted_messages = ( - _messages_to_openai_format(chat_history.messages) if are_sensitive_events_enabled() else None - ) - span = _start_completion_activity( - CHAT_COMPLETION_OPERATION, model_name, model_provider, formatted_messages, settings - ) - - try: - completions: list[ChatMessageContent] = await completion_func(*args, **kwargs) - except Exception as exception: - if span: - _set_completion_error(span, exception) - span.end() - raise - - if span and completions: - with use_span(span, end_on_exit=True): - first_completion = completions[0] - response_id = first_completion.metadata.get("id") or (first_completion.inner_content or {}).get( - "id" - ) - usage = first_completion.metadata.get("usage", None) - prompt_tokens = getattr(usage, "prompt_tokens", None) - completion_tokens = getattr(usage, "completion_tokens", None) - - completion_text: str | None = ( - _messages_to_openai_format(completions) if are_sensitive_events_enabled() else None - ) - - finish_reasons: list[str] = [str(completion.finish_reason) for completion in completions] - - _set_completion_response( - span, - completion_text, - finish_reasons, - response_id or "unknown", - prompt_tokens, - completion_tokens, - ) - - return completions - - return wrapper_decorator - - return inner_trace_chat_completion - - -def trace_text_completion(model_provider: str) -> Callable: - """Decorator to trace text completion activities.""" - - def inner_trace_text_completion(completion_func: Callable) -> Callable: - @functools.wraps(completion_func) - async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[TextContent]: - prompt: str = kwargs["prompt"] - settings: PromptExecutionSettings = kwargs["settings"] - - model_name = getattr(settings, "ai_model_id", None) or getattr(args[0], "ai_model_id", None) or "unknown" - - span = _start_completion_activity(TEXT_COMPLETION_OPERATION, model_name, model_provider, prompt, settings) - - try: - completions: list[TextContent] = await completion_func(*args, **kwargs) - except Exception as exception: - if span: - _set_completion_error(span, exception) - span.end() - raise - - if span and completions: - with use_span(span, end_on_exit=True): - first_completion = completions[0] - response_id = first_completion.metadata.get("id") or (first_completion.inner_content or {}).get( - "id" - ) - usage = first_completion.metadata.get("usage", None) - prompt_tokens = getattr(usage, "prompt_tokens", None) - completion_tokens = getattr(usage, "completion_tokens", None) - - completion_text: str | None = ( - json.dumps([completion.text for completion in completions]) - if are_sensitive_events_enabled() - else None - ) - - _set_completion_response( - span, - completion_text, - None, - response_id or "unknown", - prompt_tokens, - completion_tokens, - ) - - return completions - - return wrapper_decorator - - return inner_trace_text_completion - - -def _start_completion_activity( - operation_name: str, - model_name: str, - model_provider: str, - prompt: str | None, - execution_settings: PromptExecutionSettings | None, -) -> Span | None: - """Start a text or chat completion activity for a given model.""" - if not are_model_diagnostics_enabled(): - return None - - span = tracer.start_span(f"{operation_name} {model_name}") - - # Set attributes on the span - span.set_attributes( - { - OPERATION: operation_name, - SYSTEM: model_provider, - MODEL: model_name, - } - ) - - # TODO(@glahaye): we'll need to have a way to get these attributes from model - # providers other than OpenAI (for example if the attributes are named differently) - if execution_settings: - attribute = execution_settings.extension_data.get("max_tokens") - if attribute: - span.set_attribute(MAX_TOKENS, attribute) - - attribute = execution_settings.extension_data.get("temperature") - if attribute: - span.set_attribute(TEMPERATURE, attribute) - - attribute = execution_settings.extension_data.get("top_p") - if attribute: - span.set_attribute(TOP_P, attribute) - - if are_sensitive_events_enabled() and prompt: - span.add_event(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: prompt}) - - return span - - -def _set_completion_response( - span: Span, - completion_text: str | None, - finish_reasons: list[str] | None, - response_id: str, - prompt_tokens: int | None = None, - completion_tokens: int | None = None, -) -> None: - """Set the a text or chat completion response for a given activity.""" - if not are_model_diagnostics_enabled(): - return - - span.set_attribute(RESPONSE_ID, response_id) - - if finish_reasons: - span.set_attribute(FINISH_REASON, ",".join(finish_reasons)) - - if prompt_tokens: - span.set_attribute(PROMPT_TOKENS, prompt_tokens) - - if completion_tokens: - span.set_attribute(COMPLETION_TOKENS, completion_tokens) - - if are_sensitive_events_enabled() and completion_text: - span.add_event(COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: completion_text}) - - -def _set_completion_error(span: Span, error: Exception) -> None: - """Set an error for a text or chat completion .""" - if not are_model_diagnostics_enabled(): - return - - span.set_attribute(ERROR_TYPE, str(type(error))) - - span.set_status(StatusCode.ERROR, repr(error)) - - -def _messages_to_openai_format(messages: list[ChatMessageContent]) -> str: - """Convert a list of ChatMessageContent to a string in the OpenAI format. - - OpenTelemetry recommends formatting the messages in the OpenAI format - regardless of the actual model being used. - """ - return json.dumps([message.to_dict() for message in messages]) diff --git a/python/semantic_kernel/utils/telemetry/model_diagnostics/__init__.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/__init__.py new file mode 100644 index 000000000000..c873a5770a80 --- /dev/null +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft. All rights reserved. + +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion, trace_text_completion + +__all__ = [ + "trace_chat_completion", + "trace_text_completion", +] diff --git a/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py new file mode 100644 index 000000000000..b3dd0faf5f82 --- /dev/null +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py @@ -0,0 +1,232 @@ +# Copyright (c) Microsoft. All rights reserved. + +import functools +import json +from collections.abc import Callable +from typing import Any + +from opentelemetry.trace import Span, StatusCode, get_tracer, use_span + +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.utils.experimental_decorator import experimental_function +from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes +from semantic_kernel.utils.telemetry.model_diagnostics.model_diagnostics_settings import ModelDiagnosticSettings + +# Module to instrument GenAI models using OpenTelemetry and OpenTelemetry Semantic Conventions. +# These are experimental features and may change in the future. + +# To enable these features, set one of the following environment variables to true: +# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS +# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE +MODEL_DIAGNOSTICS_SETTINGS = ModelDiagnosticSettings.create() + +# Operation names +CHAT_COMPLETION_OPERATION = "chat.completions" +TEXT_COMPLETION_OPERATION = "text.completions" + +# Creates a tracer from the global tracer provider +tracer = get_tracer(__name__) + + +@experimental_function +def are_model_diagnostics_enabled() -> bool: + """Check if model diagnostics are enabled. + + Model diagnostics are enabled if either diagnostic is enabled or diagnostic with sensitive events is enabled. + """ + return ( + MODEL_DIAGNOSTICS_SETTINGS.enable_otel_diagnostics + or MODEL_DIAGNOSTICS_SETTINGS.enable_otel_diagnostics_sensitive + ) + + +@experimental_function +def are_sensitive_events_enabled() -> bool: + """Check if sensitive events are enabled. + + Sensitive events are enabled if the diagnostic with sensitive events is enabled. + """ + return MODEL_DIAGNOSTICS_SETTINGS.enable_otel_diagnostics_sensitive + + +@experimental_function +def trace_chat_completion(model_provider: str) -> Callable: + """Decorator to trace chat completion activities. + + Args: + model_provider (str): The model provider should describe a family of + GenAI models with specific model identified by ai_model_id. For example, + model_provider could be "openai" and ai_model_id could be "gpt-3.5-turbo". + Sometimes the model provider is unknown at runtime, in which case it can be + set to the most specific known provider. For example, while using local models + hosted by Ollama, the model provider could be set to "ollama". + """ + + def inner_trace_chat_completion(completion_func: Callable) -> Callable: + @functools.wraps(completion_func) + async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[ChatMessageContent]: + if not are_model_diagnostics_enabled(): + # If model diagnostics are not enabled, just return the completion + return await completion_func(*args, **kwargs) + + completion_service: ChatCompletionClientBase = args[0] + chat_history: ChatHistory = kwargs["chat_history"] + settings: PromptExecutionSettings = kwargs["settings"] + + with use_span( + _start_completion_activity( + CHAT_COMPLETION_OPERATION, + completion_service.ai_model_id, + model_provider, + chat_history, + settings, + ), + end_on_exit=True, + ) as current_span: + try: + completions: list[ChatMessageContent] = await completion_func(*args, **kwargs) + _set_completion_response(current_span, completions) + return completions + except Exception as exception: + _set_completion_error(current_span, exception) + raise + + return wrapper_decorator + + return inner_trace_chat_completion + + +@experimental_function +def trace_text_completion(model_provider: str) -> Callable: + """Decorator to trace text completion activities.""" + + def inner_trace_text_completion(completion_func: Callable) -> Callable: + @functools.wraps(completion_func) + async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[TextContent]: + if not are_model_diagnostics_enabled(): + # If model diagnostics are not enabled, just return the completion + return await completion_func(*args, **kwargs) + + completion_service: TextCompletionClientBase = args[0] + prompt: str = kwargs["prompt"] + settings: PromptExecutionSettings = kwargs["settings"] + + with use_span( + _start_completion_activity( + TEXT_COMPLETION_OPERATION, + completion_service.ai_model_id, + model_provider, + prompt, + settings, + ), + end_on_exit=True, + ) as current_span: + try: + completions: list[TextContent] = await completion_func(*args, **kwargs) + _set_completion_response(current_span, completions) + return completions + except Exception as exception: + _set_completion_error(current_span, exception) + raise + + return wrapper_decorator + + return inner_trace_text_completion + + +def _start_completion_activity( + operation_name: str, + model_name: str, + model_provider: str, + prompt: str | ChatHistory, + execution_settings: PromptExecutionSettings | None, +) -> Span: + """Start a text or chat completion activity for a given model.""" + span = tracer.start_span(f"{operation_name} {model_name}") + + # Set attributes on the span + span.set_attributes({ + gen_ai_attributes.OPERATION: operation_name, + gen_ai_attributes.SYSTEM: model_provider, + gen_ai_attributes.MODEL: model_name, + }) + + # TODO(@glahaye): we'll need to have a way to get these attributes from model + # providers other than OpenAI (for example if the attributes are named differently) + if execution_settings: + attribute = execution_settings.extension_data.get("max_tokens") + if attribute: + span.set_attribute(gen_ai_attributes.MAX_TOKENS, attribute) + + attribute = execution_settings.extension_data.get("temperature") + if attribute: + span.set_attribute(gen_ai_attributes.TEMPERATURE, attribute) + + attribute = execution_settings.extension_data.get("top_p") + if attribute: + span.set_attribute(gen_ai_attributes.TOP_P, attribute) + + if are_sensitive_events_enabled(): + if isinstance(prompt, ChatHistory): + prompt = _messages_to_openai_format(prompt.messages) + span.add_event(gen_ai_attributes.PROMPT_EVENT, {gen_ai_attributes.PROMPT_EVENT_PROMPT: prompt}) + + return span + + +def _set_completion_response( + current_span: Span, + completions: list[ChatMessageContent] | list[TextContent], +) -> None: + """Set the a text or chat completion response for a given activity.""" + first_completion = completions[0] + + # Set the response ID + response_id = first_completion.metadata.get("id") or (first_completion.inner_content or {}).get("id") + if response_id: + current_span.set_attribute(gen_ai_attributes.RESPONSE_ID, response_id) + + # Set the finish reason + finish_reasons = [ + str(completion.finish_reason) for completion in completions if isinstance(completion, ChatMessageContent) + ] + if finish_reasons: + current_span.set_attribute(gen_ai_attributes.FINISH_REASON, ",".join(finish_reasons)) + + # Set usage attributes + usage = first_completion.metadata.get("usage", None) + + prompt_tokens = getattr(usage, "prompt_tokens", None) + if prompt_tokens: + current_span.set_attribute(gen_ai_attributes.PROMPT_TOKENS, prompt_tokens) + + completion_tokens = getattr(usage, "completion_tokens", None) + if completion_tokens: + current_span.set_attribute(gen_ai_attributes.COMPLETION_TOKENS, completion_tokens) + + # Set the completion event + if are_sensitive_events_enabled(): + completion_text: str = _messages_to_openai_format(completions) + current_span.add_event( + gen_ai_attributes.COMPLETION_EVENT, {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: completion_text} + ) + + +def _set_completion_error(span: Span, error: Exception) -> None: + """Set an error for a text or chat completion .""" + span.set_attribute(gen_ai_attributes.ERROR_TYPE, str(type(error))) + span.set_status(StatusCode.ERROR, repr(error)) + + +def _messages_to_openai_format(messages: list[ChatMessageContent] | list[TextContent]) -> str: + """Convert a list of ChatMessageContent to a string in the OpenAI format. + + OpenTelemetry recommends formatting the messages in the OpenAI format + regardless of the actual model being used. + """ + return json.dumps([message.to_dict() for message in messages]) diff --git a/python/semantic_kernel/utils/telemetry/const.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py similarity index 83% rename from python/semantic_kernel/utils/telemetry/const.py rename to python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py index 5c74f708b986..cca37908e466 100644 --- a/python/semantic_kernel/utils/telemetry/const.py +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py @@ -1,12 +1,13 @@ # Copyright (c) Microsoft. All rights reserved. -# + # Constants for tracing activities with semantic conventions. +# Ideally, we should use the attributes from the semcov package. +# However, many of the attributes are not yet available in the package, +# so we define them here for now. # Activity tags SYSTEM = "gen_ai.system" OPERATION = "gen_ai.operation.name" -CHAT_COMPLETION_OPERATION = "chat.completions" -TEXT_COMPLETION_OPERATION = "text.completions" MODEL = "gen_ai.request.model" MAX_TOKENS = "gen_ai.request.max_tokens" # nosec TEMPERATURE = "gen_ai.request.temperature" diff --git a/python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py new file mode 100644 index 000000000000..f7e509a21b26 --- /dev/null +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py @@ -0,0 +1,31 @@ +# Copyright (c) Microsoft. All rights reserved. + +from typing import ClassVar + +from semantic_kernel.kernel_pydantic import KernelBaseSettings +from semantic_kernel.utils.experimental_decorator import experimental_class + + +@experimental_class +class ModelDiagnosticSettings(KernelBaseSettings): + """Settings for model diagnostics. + + The settings are first loaded from environment variables with + the prefix 'AZURE_AI_INFERENCE_'. + If the environment variables are not found, the settings can + be loaded from a .env file with the encoding 'utf-8'. + If the settings are not found in the .env file, the settings + are ignored; however, validation will fail alerting that the + settings are missing. + + Required settings for prefix 'SEMANTICKERNEL_EXPERIMENTAL_GENAI_' are: + - enable_otel_diagnostics: bool - Enable OpenTelemetry diagnostics. Default is False. + (Env var SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS) + - enable_otel_diagnostics_sensitive: bool - Enable OpenTelemetry sensitive events. Default is False. + (Env var SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE) + """ + + env_prefix: ClassVar[str] = "SEMANTICKERNEL_EXPERIMENTAL_GENAI_" + + enable_otel_diagnostics: bool = False + enable_otel_diagnostics_sensitive: bool = False diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 692c9c759ab1..3d8d263e7a45 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -161,6 +161,11 @@ def chat_history() -> "ChatHistory": return ChatHistory() +@fixture(scope="function") +def prompt() -> str: + return "test prompt" + + # @fixture(autouse=True) # def enable_debug_mode(): # """Set `autouse=True` to enable easy debugging for tests. @@ -306,10 +311,7 @@ def anthropic_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): if override_env_param_dict is None: override_env_param_dict = {} - env_vars = { - "ANTHROPIC_CHAT_MODEL_ID": "test_chat_model_id", - "ANTHROPIC_API_KEY": "test_api_key" - } + env_vars = {"ANTHROPIC_CHAT_MODEL_ID": "test_chat_model_id", "ANTHROPIC_API_KEY": "test_api_key"} env_vars.update(override_env_param_dict) diff --git a/python/tests/unit/connectors/mistral_ai/services/test_mistralai_text_embeddings.py b/python/tests/unit/connectors/mistral_ai/services/test_mistralai_text_embeddings.py index 98550ca6f1ad..61c960b4810f 100644 --- a/python/tests/unit/connectors/mistral_ai/services/test_mistralai_text_embeddings.py +++ b/python/tests/unit/connectors/mistral_ai/services/test_mistralai_text_embeddings.py @@ -13,7 +13,7 @@ def test_embedding_with_env_variables(mistralai_unit_test_env): text_embedding = MistralAITextEmbedding() assert text_embedding.ai_model_id == "test_embedding_model_id" - assert text_embedding.client._api_key == "test_api_key" + assert text_embedding.async_client._api_key == "test_api_key" @pytest.mark.parametrize("exclude_list", [["MISTRALAI_API_KEY", "MISTRALAI_EMBEDDING_MODEL_ID"]], indirect=True) @@ -23,33 +23,33 @@ def test_embedding_with_constructor(mistralai_unit_test_env): ai_model_id="overwrite-model", ) assert text_embedding.ai_model_id == "overwrite-model" - assert text_embedding.client._api_key == "overwrite-api-key" + assert text_embedding.async_client._api_key == "overwrite-api-key" def test_embedding_with_client(mistralai_unit_test_env): client = MagicMock(spec=MistralAsyncClient) - text_embedding = MistralAITextEmbedding(client=client) - assert text_embedding.client == client + text_embedding = MistralAITextEmbedding(async_client=client) + assert text_embedding.async_client == client assert text_embedding.ai_model_id == "test_embedding_model_id" def test_embedding_with_api_key(mistralai_unit_test_env): text_embedding = MistralAITextEmbedding(api_key="overwrite-api-key") - assert text_embedding.client._api_key == "overwrite-api-key" + assert text_embedding.async_client._api_key == "overwrite-api-key" assert text_embedding.ai_model_id == "test_embedding_model_id" def test_embedding_with_model(mistralai_unit_test_env): text_embedding = MistralAITextEmbedding(ai_model_id="overwrite-model") assert text_embedding.ai_model_id == "overwrite-model" - assert text_embedding.client._api_key == "test_api_key" + assert text_embedding.async_client._api_key == "test_api_key" -@pytest.mark.parametrize("exclude_list", [["MISTRALAI_EMBEDDING_MODEL_ID"]], indirect=True) +@pytest.mark.parametrize("exclude_list", [["MISTRALAI_EMBEDDING_MODEL_ID"]], indirect=True) def test_embedding_with_model_without_env(mistralai_unit_test_env): text_embedding = MistralAITextEmbedding(ai_model_id="overwrite-model") assert text_embedding.ai_model_id == "overwrite-model" - assert text_embedding.client._api_key == "test_api_key" + assert text_embedding.async_client._api_key == "test_api_key" @pytest.mark.parametrize("exclude_list", [["MISTRALAI_EMBEDDING_MODEL_ID"]], indirect=True) @@ -90,7 +90,7 @@ async def test_embedding_generate_raw_embedding(mistralai_unit_test_env): mock_client = AsyncMock(spec=MistralAsyncClient) mock_embedding_response = MagicMock(spec=EmbeddingResponse, data=[MagicMock(embedding=[1, 2, 3, 4, 5])]) mock_client.embeddings.return_value = mock_embedding_response - text_embedding = MistralAITextEmbedding(client=mock_client) + text_embedding = MistralAITextEmbedding(async_client=mock_client) embedding = await text_embedding.generate_raw_embeddings(["test"]) assert embedding == [[1, 2, 3, 4, 5]] @@ -100,7 +100,7 @@ async def test_embedding_generate_embedding(mistralai_unit_test_env): mock_client = AsyncMock(spec=MistralAsyncClient) mock_embedding_response = MagicMock(spec=EmbeddingResponse, data=[MagicMock(embedding=[1, 2, 3, 4, 5])]) mock_client.embeddings.return_value = mock_embedding_response - text_embedding = MistralAITextEmbedding(client=mock_client) + text_embedding = MistralAITextEmbedding(async_client=mock_client) embedding = await text_embedding.generate_embeddings(["test"]) assert embedding.tolist() == [[1, 2, 3, 4, 5]] @@ -109,6 +109,6 @@ async def test_embedding_generate_embedding(mistralai_unit_test_env): async def test_embedding_generate_embedding_exception(mistralai_unit_test_env): mock_client = AsyncMock(spec=MistralAsyncClient) mock_client.embeddings.side_effect = Exception("Test Exception") - text_embedding = MistralAITextEmbedding(client=mock_client) + text_embedding = MistralAITextEmbedding(async_client=mock_client) with pytest.raises(ServiceResponseException): await text_embedding.generate_embeddings(["test"]) diff --git a/python/tests/unit/utils/model_diagnostics/conftest.py b/python/tests/unit/utils/model_diagnostics/conftest.py new file mode 100644 index 000000000000..ab7528af2bad --- /dev/null +++ b/python/tests/unit/utils/model_diagnostics/conftest.py @@ -0,0 +1,91 @@ +# Copyright (c) Microsoft. All rights reserved. + + +import sys +from collections.abc import AsyncGenerator +from typing import Any, ClassVar + +import pytest + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +import semantic_kernel +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.streaming_text_content import StreamingTextContent +from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.utils.telemetry.model_diagnostics.model_diagnostics_settings import ModelDiagnosticSettings + + +@pytest.fixture() +def model_diagnostics_unit_test_env(monkeypatch): + """Fixture to set environment variables for Model Diagnostics Unit Tests.""" + env_vars = { + "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS": "true", + "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE": "true", + } + + for key, value in env_vars.items(): + monkeypatch.setenv(key, value) + + # Need to reload the settings to pick up the new environment variables since the + # settings are loaded at import time and this fixture is called after the import + semantic_kernel.utils.telemetry.model_diagnostics.decorators.MODEL_DIAGNOSTICS_SETTINGS = ( + ModelDiagnosticSettings.create() + ) + + +@pytest.fixture() +def service_env_vars(monkeypatch, request): + """Fixture to set environment variables for AI Service Unit Tests.""" + for key, value in request.param.items(): + monkeypatch.setenv(key, value) + + +class MockChatCompletion(ChatCompletionClientBase): + MODEL_PROVIDER_NAME: ClassVar[str] = "mock" + + @override + async def get_chat_message_contents( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + **kwargs: Any, + ) -> list["ChatMessageContent"]: + return [] + + @override + async def get_streaming_chat_message_contents( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + **kwargs: Any, + ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: + yield [] + + +class MockTextCompletion(TextCompletionClientBase): + MODEL_PROVIDER_NAME: ClassVar[str] = "mock" + + @override + async def get_text_contents( + self, + prompt: str, + settings: "PromptExecutionSettings", + ) -> list["TextContent"]: + return [] + + @override + async def get_streaming_text_contents( + self, + prompt: str, + settings: "PromptExecutionSettings", + ) -> AsyncGenerator[list["StreamingTextContent"], Any]: + yield [] diff --git a/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py b/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py new file mode 100644 index 000000000000..95de327818e7 --- /dev/null +++ b/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py @@ -0,0 +1,172 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import patch + +import pytest +from opentelemetry.trace import StatusCode + +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.contents.utils.finish_reason import FinishReason +from semantic_kernel.exceptions.service_exceptions import ServiceResponseException +from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( + CHAT_COMPLETION_OPERATION, + _messages_to_openai_format, + trace_chat_completion, +) +from tests.unit.utils.model_diagnostics.conftest import MockChatCompletion + +pytestmark = pytest.mark.parametrize( + "execution_settings, mock_response", + [ + pytest.param( + PromptExecutionSettings( + extension_data={ + "max_tokens": 1000, + "temperature": 0.5, + "top_p": 0.9, + } + ), + [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id="ai_model_id", + content="Test content", + metadata={"id": "test_id"}, + finish_reason=FinishReason.STOP, + ) + ], + id="test_execution_settings_with_extension_data", + ), + pytest.param( + PromptExecutionSettings(), + [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id="ai_model_id", + metadata={"id": "test_id"}, + finish_reason=FinishReason.STOP, + ) + ], + id="test_execution_settings_no_extension_data", + ), + pytest.param( + PromptExecutionSettings(), + [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id="ai_model_id", + metadata={}, + finish_reason=FinishReason.STOP, + ) + ], + id="test_chat_message_content_no_metadata", + ), + pytest.param( + PromptExecutionSettings(), + [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id="ai_model_id", + metadata={"id": "test_id"}, + ) + ], + id="test_chat_message_content_no_finish_reason", + ), + ], +) + + +@pytest.mark.asyncio +@patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN +async def test_trace_chat_completion( + mock_span, + execution_settings, + mock_response, + chat_history, + model_diagnostics_unit_test_env, +): + # Setup + chat_completion: ChatCompletionClientBase = MockChatCompletion(ai_model_id="ai_model_id") + + with patch.object(MockChatCompletion, "get_chat_message_contents", return_value=mock_response): + # We need to reapply the decorator to the method since the mock will not have the decorator applied + MockChatCompletion.get_chat_message_contents = trace_chat_completion(MockChatCompletion.MODEL_PROVIDER_NAME)( + chat_completion.get_chat_message_contents + ) + + results: list[ChatMessageContent] = await chat_completion.get_chat_message_contents( + chat_history=chat_history, settings=execution_settings + ) + + assert results == mock_response + + # Before the call to the model + mock_span.set_attributes.assert_called_with({ + gen_ai_attributes.OPERATION: CHAT_COMPLETION_OPERATION, + gen_ai_attributes.SYSTEM: MockChatCompletion.MODEL_PROVIDER_NAME, + gen_ai_attributes.MODEL: chat_completion.ai_model_id, + }) + + # No all connectors take the same parameters + if execution_settings.extension_data.get("max_tokens") is not None: + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.MAX_TOKENS, execution_settings.extension_data["max_tokens"] + ) + if execution_settings.extension_data.get("temperature") is not None: + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.TEMPERATURE, execution_settings.extension_data["temperature"] + ) + if execution_settings.extension_data.get("top_p") is not None: + mock_span.set_attribute.assert_any_call(gen_ai_attributes.TOP_P, execution_settings.extension_data["top_p"]) + + mock_span.add_event.assert_any_call( + gen_ai_attributes.PROMPT_EVENT, + {gen_ai_attributes.PROMPT_EVENT_PROMPT: _messages_to_openai_format(chat_history)}, + ) + + # After the call to the model + # Not all connectors return the same metadata + if mock_response[0].metadata.get("id") is not None: + mock_span.set_attribute.assert_any_call(gen_ai_attributes.RESPONSE_ID, mock_response[0].metadata["id"]) + if any(completion.finish_reason is not None for completion in mock_response): + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.FINISH_REASON, + ",".join([str(completion.finish_reason) for completion in mock_response]), + ) + + mock_span.add_event.assert_any_call( + gen_ai_attributes.COMPLETION_EVENT, + {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: _messages_to_openai_format(mock_response)}, + ) + + +@pytest.mark.asyncio +@patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN +async def test_trace_chat_completion_exception( + mock_span, + execution_settings, + mock_response, + chat_history, + model_diagnostics_unit_test_env, +): + # Setup + chat_completion: ChatCompletionClientBase = MockChatCompletion(ai_model_id="ai_model_id") + + with patch.object(MockChatCompletion, "get_chat_message_contents", side_effect=ServiceResponseException()): + # We need to reapply the decorator to the method since the mock will not have the decorator applied + MockChatCompletion.get_chat_message_contents = trace_chat_completion(MockChatCompletion.MODEL_PROVIDER_NAME)( + chat_completion.get_chat_message_contents + ) + + with pytest.raises(ServiceResponseException): + await chat_completion.get_chat_message_contents(chat_history=chat_history, settings=execution_settings) + + exception = ServiceResponseException() + mock_span.set_attribute.assert_any_call(gen_ai_attributes.ERROR_TYPE, str(type(exception))) + mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) + + mock_span.end.assert_any_call() diff --git a/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py b/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py new file mode 100644 index 000000000000..f6b4d47e1b97 --- /dev/null +++ b/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py @@ -0,0 +1,150 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import patch + +import pytest +from opentelemetry.trace import StatusCode + +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.exceptions.service_exceptions import ServiceResponseException +from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( + TEXT_COMPLETION_OPERATION, + _messages_to_openai_format, + trace_text_completion, +) +from tests.unit.utils.model_diagnostics.conftest import MockTextCompletion + +pytestmark = pytest.mark.parametrize( + "execution_settings, mock_response", + [ + pytest.param( + PromptExecutionSettings( + extension_data={ + "max_tokens": 1000, + "temperature": 0.5, + "top_p": 0.9, + } + ), + [ + TextContent( + ai_model_id="ai_model_id", + text="Test content", + metadata={"id": "test_id"}, + ) + ], + id="test_execution_settings_with_extension_data", + ), + pytest.param( + PromptExecutionSettings(), + [ + TextContent( + ai_model_id="ai_model_id", + text="Test content", + metadata={"id": "test_id"}, + ) + ], + id="test_execution_settings_no_extension_data", + ), + pytest.param( + PromptExecutionSettings(), + [ + TextContent( + ai_model_id="ai_model_id", + text="Test content", + metadata={}, + ) + ], + id="test_text_content_no_metadata", + ), + ], +) + + +@pytest.mark.asyncio +@patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN +async def test_trace_text_completion( + mock_span, + execution_settings, + mock_response, + prompt, + model_diagnostics_unit_test_env, +): + # Setup + text_completion: TextCompletionClientBase = MockTextCompletion(ai_model_id="ai_model_id") + + with patch.object(MockTextCompletion, "get_text_contents", return_value=mock_response): + # We need to reapply the decorator to the method since the mock will not have the decorator applied + MockTextCompletion.get_text_contents = trace_text_completion(MockTextCompletion.MODEL_PROVIDER_NAME)( + text_completion.get_text_contents + ) + + results: list[ChatMessageContent] = await text_completion.get_text_contents( + prompt=prompt, settings=execution_settings + ) + + assert results == mock_response + + # Before the call to the model + mock_span.set_attributes.assert_called_with({ + gen_ai_attributes.OPERATION: TEXT_COMPLETION_OPERATION, + gen_ai_attributes.SYSTEM: MockTextCompletion.MODEL_PROVIDER_NAME, + gen_ai_attributes.MODEL: text_completion.ai_model_id, + }) + + # No all connectors take the same parameters + if execution_settings.extension_data.get("max_tokens") is not None: + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.MAX_TOKENS, execution_settings.extension_data["max_tokens"] + ) + if execution_settings.extension_data.get("temperature") is not None: + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.TEMPERATURE, execution_settings.extension_data["temperature"] + ) + if execution_settings.extension_data.get("top_p") is not None: + mock_span.set_attribute.assert_any_call(gen_ai_attributes.TOP_P, execution_settings.extension_data["top_p"]) + + mock_span.add_event.assert_any_call( + gen_ai_attributes.PROMPT_EVENT, {gen_ai_attributes.PROMPT_EVENT_PROMPT: prompt} + ) + + # After the call to the model + # Not all connectors return the same metadata + if mock_response[0].metadata.get("id") is not None: + mock_span.set_attribute.assert_any_call(gen_ai_attributes.RESPONSE_ID, mock_response[0].metadata["id"]) + + mock_span.add_event.assert_any_call( + gen_ai_attributes.COMPLETION_EVENT, + {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: _messages_to_openai_format(mock_response)}, + ) + + +@pytest.mark.asyncio +@patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN +async def test_trace_text_completion_exception( + mock_span, + execution_settings, + mock_response, + prompt, + model_diagnostics_unit_test_env, +): + # Setup + text_completion: TextCompletionClientBase = MockTextCompletion(ai_model_id="ai_model_id") + + with patch.object(MockTextCompletion, "get_text_contents", side_effect=ServiceResponseException()): + # We need to reapply the decorator to the method since the mock will not have the decorator applied + MockTextCompletion.get_text_contents = trace_text_completion(MockTextCompletion.MODEL_PROVIDER_NAME)( + text_completion.get_text_contents + ) + + with pytest.raises(ServiceResponseException): + await text_completion.get_text_contents(prompt=prompt, settings=execution_settings) + + exception = ServiceResponseException() + mock_span.set_attribute.assert_any_call(gen_ai_attributes.ERROR_TYPE, str(type(exception))) + mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) + + mock_span.end.assert_any_call() diff --git a/python/tests/unit/utils/test_tracing.py b/python/tests/unit/utils/test_tracing.py deleted file mode 100644 index 5d2c2f9e4bf6..000000000000 --- a/python/tests/unit/utils/test_tracing.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -from unittest.mock import patch - -import pytest -from openai.types import Completion as TextCompletion -from openai.types import CompletionChoice -from opentelemetry.trace import StatusCode - -from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion -from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base import OpenAIChatCompletionBase -from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion import OpenAITextCompletion -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings -from semantic_kernel.contents.chat_history import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.text_content import TextContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.contents.utils.finish_reason import FinishReason -from semantic_kernel.exceptions.service_exceptions import ServiceResponseException -from semantic_kernel.utils.telemetry.const import ( - CHAT_COMPLETION_OPERATION, - COMPLETION_EVENT, - COMPLETION_EVENT_COMPLETION, - ERROR_TYPE, - FINISH_REASON, - MAX_TOKENS, - MODEL, - OPERATION, - PROMPT_EVENT, - PROMPT_EVENT_PROMPT, - RESPONSE_ID, - SYSTEM, - TEMPERATURE, - TEXT_COMPLETION_OPERATION, - TOP_P, -) - -TEST_CONTENT = "Test content" -TEST_RESPONSE_ID = "dummy_id" -TEST_MAX_TOKENS = "1000" -TEST_MODEL = "dummy_model" -TEST_TEMPERATURE = "0.5" -TEST_TOP_P = "0.9" -TEST_CREATED_AT = 1 -TEST_TEXT_PROMPT = "Test prompt" -EXPECTED_CHAT_COMPLETION_EVENT_PAYLOAD = f'[{{"role": "assistant", "content": "{TEST_CONTENT}"}}]' -EXPECTED_TEXT_COMPLETION_EVENT_PAYLOAD = f'["{TEST_CONTENT}"]' - -TEST_CHAT_RESPONSE = [ - ChatMessageContent( - role=AuthorRole.ASSISTANT, - ai_model_id=TEST_MODEL, - content=TEST_CONTENT, - metadata={"id": TEST_RESPONSE_ID}, - finish_reason=FinishReason.STOP, - ) -] - -TEST_TEXT_RESPONSE = TextCompletion( - model=TEST_MODEL, - text=TEST_CONTENT, - id=TEST_RESPONSE_ID, - choices=[CompletionChoice(index=0, text=TEST_CONTENT, finish_reason="stop")], - created=TEST_CREATED_AT, - object="text_completion", -) - -TEST_TEXT_RESPONSE_METADATA = { - "id": TEST_RESPONSE_ID, - "created": TEST_CREATED_AT, - "system_fingerprint": None, - "logprobs": None, - "usage": None, -} - -EXPECTED_TEXT_CONTENT = [ - TextContent( - ai_model_id=TEST_MODEL, - text=TEST_CONTENT, - encoding=None, - metadata=TEST_TEXT_RESPONSE_METADATA, - inner_content=TEST_TEXT_RESPONSE, - ) -] - - -@pytest.mark.asyncio -@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) -@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) -@patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._send_chat_request", - return_value=TEST_CHAT_RESPONSE, -) -@patch("opentelemetry.trace.INVALID_SPAN") -async def test_trace_chat_completion( - mock_span, - mock_send_chat_request, - mock_sensitive_events_enabled, - mock_model_diagnostics_enabled, - openai_unit_test_env, -): - chat_completion = OpenAIChatCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") - extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} - - results: list[ChatMessageContent] = await chat_completion.get_chat_message_contents( - chat_history=ChatHistory(), settings=PromptExecutionSettings(extension_data=extension_data) - ) - - assert results == TEST_CHAT_RESPONSE - - mock_span.set_attributes.assert_called_with( - { - OPERATION: CHAT_COMPLETION_OPERATION, - SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, - MODEL: TEST_MODEL, - } - ) - mock_span.set_attribute.assert_any_call(MAX_TOKENS, TEST_MAX_TOKENS) - mock_span.set_attribute.assert_any_call(TEMPERATURE, TEST_TEMPERATURE) - mock_span.set_attribute.assert_any_call(TOP_P, TEST_TOP_P) - mock_span.add_event.assert_any_call(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: "[]"}) - - mock_span.set_attribute.assert_any_call(RESPONSE_ID, TEST_RESPONSE_ID) - mock_span.set_attribute.assert_any_call(FINISH_REASON, str(FinishReason.STOP)) - mock_span.add_event.assert_any_call( - COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: EXPECTED_CHAT_COMPLETION_EVENT_PAYLOAD} - ) - - -@pytest.mark.asyncio -@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) -@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) -@patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion_base.OpenAITextCompletionBase._send_request", - return_value=TEST_TEXT_RESPONSE, -) -@patch("opentelemetry.trace.INVALID_SPAN") -async def test_trace_text_completion( - mock_span, mock_send_request, mock_sensitive_events_enabled, mock_model_diagnostics_enabled, openai_unit_test_env -): - chat_completion = OpenAITextCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") - extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} - - results: list[TextContent] = await chat_completion.get_text_contents( - prompt=TEST_TEXT_PROMPT, settings=PromptExecutionSettings(extension_data=extension_data) - ) - - assert results == EXPECTED_TEXT_CONTENT - - mock_span.set_attributes.assert_called_with( - { - OPERATION: TEXT_COMPLETION_OPERATION, - SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, - MODEL: TEST_MODEL, - } - ) - mock_span.set_attribute.assert_any_call(MAX_TOKENS, TEST_MAX_TOKENS) - mock_span.set_attribute.assert_any_call(TEMPERATURE, TEST_TEMPERATURE) - mock_span.set_attribute.assert_any_call(TOP_P, TEST_TOP_P) - mock_span.add_event.assert_any_call(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: TEST_TEXT_PROMPT}) - - mock_span.set_attribute.assert_any_call(RESPONSE_ID, TEST_RESPONSE_ID) - mock_span.add_event.assert_any_call( - COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: EXPECTED_TEXT_COMPLETION_EVENT_PAYLOAD} - ) - - -@pytest.mark.asyncio -@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) -@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) -@patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._send_chat_request", - side_effect=ServiceResponseException, -) -@patch("opentelemetry.trace.INVALID_SPAN") -async def test_trace_chat_completion_exception( - mock_span, - mock_send_chat_request, - mock_sensitive_events_enabled, - mock_model_diagnostics_enabled, - openai_unit_test_env, -): - chat_completion = OpenAIChatCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") - extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} - - with pytest.raises(ServiceResponseException): - await chat_completion.get_chat_message_contents( - chat_history=ChatHistory(), settings=PromptExecutionSettings(extension_data=extension_data) - ) - - mock_span.set_attributes.assert_called_with( - { - OPERATION: CHAT_COMPLETION_OPERATION, - SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, - MODEL: TEST_MODEL, - } - ) - - exception = ServiceResponseException() - mock_span.set_attribute.assert_any_call(ERROR_TYPE, str(type(exception))) - mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) - - mock_span.end.assert_any_call() - - -@pytest.mark.asyncio -@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) -@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) -@patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion_base.OpenAITextCompletionBase._send_request", - side_effect=ServiceResponseException, -) -@patch("opentelemetry.trace.INVALID_SPAN") -async def test_trace_text_completion_exception( - mock_span, - mock_send_chat_request, - mock_sensitive_events_enabled, - mock_model_diagnostics_enabled, - openai_unit_test_env, -): - chat_completion = OpenAITextCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") - extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} - - with pytest.raises(ServiceResponseException): - await chat_completion.get_text_contents( - prompt=TEST_TEXT_PROMPT, settings=PromptExecutionSettings(extension_data=extension_data) - ) - - mock_span.set_attributes.assert_called_with( - { - OPERATION: TEXT_COMPLETION_OPERATION, - SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, - MODEL: TEST_MODEL, - } - ) - - exception = ServiceResponseException() - mock_span.set_attribute.assert_any_call(ERROR_TYPE, str(type(exception))) - mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) - - mock_span.end.assert_any_call()