Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python: #6761 Onnx Connector #8106

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ff979ba
setup for onnx connector
nmoeller Aug 14, 2024
49a2a72
initial implementation commit
nmoeller Aug 16, 2024
b2c5a70
Merge branch 'main' into issue-6761-ONNX-gen-ai-Connector
nmoeller Aug 16, 2024
342db1d
initial unit tests for onnx text completion
nmoeller Aug 19, 2024
9c371de
Merge branch 'issue-6761-ONNX-gen-ai-Connector' of https://github.com…
nmoeller Aug 19, 2024
adf262b
added chat completion support
nmoeller Aug 21, 2024
a40a6cb
added small comment regarding Image Opening
nmoeller Aug 22, 2024
fd6d9b4
Merge remote-tracking branch 'origin/main' into issue-6761-ONNX-gen-a…
nmoeller Sep 2, 2024
b118396
migrated to uv
nmoeller Sep 2, 2024
0da7615
Merge remote-tracking branch 'origin/main' into issue-6761-ONNX-gen-a…
nmoeller Sep 12, 2024
0b6df05
Merge remote-tracking branch 'origin/main' into issue-6761-ONNX-gen-a…
nmoeller Sep 12, 2024
3c41141
added unit tests and integration tests
nmoeller Sep 17, 2024
81bf663
added unit tests and integration tests
nmoeller Sep 18, 2024
bd21157
Merge remote-tracking branch 'origin/main' into issue-6761-ONNX-gen-a…
nmoeller Sep 18, 2024
21f585f
Update python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai…
nmoeller Sep 19, 2024
58702f3
Update python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai…
nmoeller Sep 19, 2024
8908cb9
Update python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai…
nmoeller Sep 19, 2024
af15de6
Update python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai…
nmoeller Sep 19, 2024
14128e2
integrated pr feedback
nmoeller Sep 19, 2024
352dede
Merge branch 'issue-6761-ONNX-gen-ai-Connector' of https://github.com…
nmoeller Sep 19, 2024
9471cbb
integrated pr feedback
nmoeller Sep 19, 2024
49514d6
Merge remote-tracking branch 'origin/main' into issue-6761-ONNX-gen-a…
nmoeller Sep 20, 2024
322afd8
integrated new template architecture
nmoeller Sep 20, 2024
7c31645
adjusted default max length for int test with image
nmoeller Sep 20, 2024
477db50
fixed documentation in samples
nmoeller Sep 20, 2024
030254a
fixed docstring for chat completion
nmoeller Sep 20, 2024
627f0b7
Merge branch 'main' into issue-6761-ONNX-gen-ai-Connector
nmoeller Sep 20, 2024
e86db81
Update python/semantic_kernel/connectors/ai/onnx/onnx_utils.py
nmoeller Sep 21, 2024
cdb18ba
implemented pr feedback
nmoeller Sep 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ mistralai = [
ollama = [
"ollama ~= 0.2"
]
onnx = [
"onnxruntime-genai ~= 0.4"
]
anthropic = [
"anthropic ~= 0.32"
]
Expand Down
75 changes: 75 additions & 0 deletions python/samples/concepts/local_models/onnx_chat_completion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) Microsoft. All rights reserved.


import asyncio

from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
from semantic_kernel.contents.chat_history import ChatHistory
from semantic_kernel.kernel import Kernel

# This concept sample shows how to use the Onnx connector with
# a local model running in Onnx

kernel = Kernel()

service_id = "phi3"
#############################################
# Make sure to download an ONNX model
# (https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-onnx)
# If onnxruntime-genai is used:
# use the model stored in /cpu folder
# If onnxruntime-genai-cuda is installed for gpu use:
# use the model stored in /cuda folder
# Then set ONNX_GEN_AI_FOLDER environment variable to the path to the model folder
#############################################
streaming = True

chat_completion = OnnxGenAIChatCompletion(ai_model_id=service_id, template="phi3")
settings = OnnxGenAIPromptExecutionSettings()

system_message = """You are a helpful assistant."""
chat_history = ChatHistory(system_message=system_message)


async def chat() -> bool:
try:
user_input = input("User:> ")
except KeyboardInterrupt:
print("\n\nExiting chat...")
return False
except EOFError:
print("\n\nExiting chat...")
return False

if user_input == "exit":
print("\n\nExiting chat...")
return False
chat_history.add_user_message(user_input)
if streaming:
print("Mosscap:> ", end="")
message = ""
async for chunk in chat_completion.get_streaming_chat_message_content(
chat_history=chat_history, settings=settings, kernel=kernel
):
if chunk:
print(str(chunk), end="")
message += str(chunk)
print("\n")
chat_history.add_assistant_message(message)
else:
answer = await chat_completion.get_chat_message_content(
chat_history=chat_history, settings=settings, kernel=kernel
)
print(f"Mosscap:> {answer}")
chat_history.add_assistant_message(answer)
return True


async def main() -> None:
chatting = True
while chatting:
chatting = await chat()


if __name__ == "__main__":
asyncio.run(main())
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (c) Microsoft. All rights reserved.


import asyncio

from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
from semantic_kernel.contents import AuthorRole, ChatMessageContent, ImageContent
from semantic_kernel.contents.chat_history import ChatHistory
from semantic_kernel.kernel import Kernel

# This concept sample shows how to use the Onnx connector with
# a local model running in Onnx

kernel = Kernel()

service_id = "phi3"
#############################################
# Make sure to download an ONNX model
# If onnxruntime-genai is used:
# (https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu)
# If onnxruntime-genai-cuda is installed for gpu use:
# (https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-gpu)
# Then set ONNX_GEN_AI_FOLDER environment variable to the path to the model folder
#############################################
streaming = False

chat_completion = OnnxGenAIChatCompletion(ai_model_id=service_id, template="phi3v")

# Max length prperty is important to allocate RAM
# If the value is to big, you ran out of memory
# If the value is to small, your input is limited
settings = OnnxGenAIPromptExecutionSettings(max_length=3072)

system_message = """
You are a helpful assistant.
You know about provided images and the history of the conversation.
"""
chat_history = ChatHistory(system_message=system_message)


async def chat() -> bool:
try:
user_input = input("User:> ")
except KeyboardInterrupt:
print("\n\nExiting chat...")
return False
except EOFError:
print("\n\nExiting chat...")
return False

if user_input == "exit":
print("\n\nExiting chat...")
return False
chat_history.add_user_message(user_input)
if streaming:
print("Mosscap:> ", end="")
message = ""
async for chunk in chat_completion.get_streaming_chat_message_content(
chat_history=chat_history, settings=settings, kernel=kernel
):
print(chunk.content, end="")
if chunk.content:
message += chunk.content
print("\n")
chat_history.add_message(message)
else:
answer = await chat_completion.get_chat_message_content(
chat_history=chat_history, settings=settings, kernel=kernel
)
print(f"Mosscap:> {answer}")
chat_history.add_message(answer)
return True


async def main() -> None:
chatting = True
image_path = input("Image Path (leave empty if no image): ")
if image_path:
chat_history.add_message(
ChatMessageContent(
role=AuthorRole.USER,
items=[
ImageContent.from_image_path(image_path=image_path),
],
),
)
while chatting:
chatting = await chat()


if __name__ == "__main__":
asyncio.run(main())
72 changes: 72 additions & 0 deletions python/samples/concepts/local_models/onnx_text_completion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright (c) Microsoft. All rights reserved.


import asyncio

from semantic_kernel.connectors.ai.onnx import OnnxGenAITextCompletion
from semantic_kernel.functions.kernel_arguments import KernelArguments
from semantic_kernel.kernel import Kernel

# This concept sample shows how to use the Onnx connector with
# a local model running in Onnx

kernel = Kernel()

service_id = "phi3"
#############################################
# Make sure to download an ONNX model
# (https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-onnx)
# If onnxruntime-genai is used:
# use the model stored in /cpu folder
# If onnxruntime-genai-cuda is installed for gpu use:
# use the model stored in /cuda folder
# Then set ONNX_GEN_AI_FOLDER environment variable to the path to the model folder
#############################################
streaming = True

kernel.add_service(OnnxGenAITextCompletion(ai_model_id=service_id))

settings = kernel.get_prompt_execution_settings_from_service_id(service_id)

chat_function = kernel.add_function(
plugin_name="ChatBot",
function_name="Chat",
prompt="<|user|>{{$user_input}}<|end|><|assistant|>",
template_format="semantic-kernel",
prompt_execution_settings=settings,
)


async def chat() -> bool:
try:
user_input = input("User:> ")
except KeyboardInterrupt:
print("\n\nExiting chat...")
return False
except EOFError:
print("\n\nExiting chat...")
return False

if user_input == "exit":
print("\n\nExiting chat...")
return False

if streaming:
print("Mosscap:> ", end="")
async for chunk in kernel.invoke_stream(chat_function, KernelArguments(user_input=user_input)):
print(chunk[0].text, end="")
print("\n")
else:
answer = await kernel.invoke(chat_function, KernelArguments(user_input=user_input))
print(f"Mosscap:> {answer}")
return True


async def main() -> None:
chatting = True
while chatting:
chatting = await chat()


if __name__ == "__main__":
asyncio.run(main())
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def __init__(
except ValidationError as ex:
raise ServiceInitializationError("Failed to create Ollama settings.", ex) from ex

if not ollama_settings.model:
raise ServiceInitializationError("Please provide ai_model_id or OLLAMA_MODEL env variable is required")

super().__init__(
service_id=service_id or ollama_settings.model,
ai_model_id=ollama_settings.model,
Expand Down
9 changes: 9 additions & 0 deletions python/semantic_kernel/connectors/ai/onnx/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) Microsoft. All rights reserved.

from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import (
OnnxGenAIPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.onnx.services.onnx_gen_ai_chat_completion import OnnxGenAIChatCompletion
from semantic_kernel.connectors.ai.onnx.services.onnx_gen_ai_text_completion import OnnxGenAITextCompletion

__all__ = ['OnnxGenAIChatCompletion', 'OnnxGenAIPromptExecutionSettings', 'OnnxGenAITextCompletion']
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) Microsoft. All rights reserved.


from pydantic import Field

from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings


class OnnxGenAIPromptExecutionSettings(PromptExecutionSettings):
"""OnnxGenAI prompt execution settings."""

diversity_penalty: float | None = Field(None, ge=0.0, le=1.0)
do_sample: bool = False
early_stopping: bool = True
length_penalty: float | None = Field(None, ge=0.0, le=1.0)
max_length: int = Field(3072, gt=0)
min_length: int | None = Field(None, gt=0)
no_repeat_ngram_size: int = 0
num_beams: int | None = Field(None, gt=0)
num_return_sequences: int | None = Field(None, gt=0)
past_present_share_buffer: int = True
repetition_penalty: float | None = Field(None, ge=0.0, le=1.0)
temperature: float | None = Field(None, ge=0.0, le=2.0)
top_k: int | None = Field(None, gt=0)
top_p: float | None = Field(None, ge=0.0, le=1.0)
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) Microsoft. All rights reserved.

from typing import ClassVar

from semantic_kernel.kernel_pydantic import KernelBaseSettings


class OnnxGenAISettings(KernelBaseSettings):
"""Onnx Gen AI model settings.

The settings are first loaded from environment variables with the prefix 'ONNX_GEN_AI_'. If the
environment variables are not found, the settings can be loaded from a .env file with the
encoding 'utf-8'. If the settings are not found in the .env file, the settings are ignored;
however, validation will fail alerting that the settings are missing.

Optional settings for prefix 'ONNX_GEN_AI_' are:
- folder: Path to the Onnx model (ENV: ONNX_GEN_AI_FOLDER).
- env_file_path: if provided, the .env settings are read from this file path location
"""

env_prefix: ClassVar[str] = "ONNX_GEN_AI_"
folder: str
Empty file.
Loading
Loading