Skip to content

Commit

Permalink
feat: extend models supported.
Browse files Browse the repository at this point in the history
  • Loading branch information
drudilorenzo committed Apr 27, 2024
1 parent cfb5d68 commit 0bfbad1
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 63 deletions.
17 changes: 6 additions & 11 deletions openai_cost_logger/constants.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
from enum import Enum

"""Default value for the cost-logs directory."""
DEFAULT_LOG_PATH = "cost-logs"


"""Enum containing the tested models."""
class Models(Enum):
TURBO_3_5 = "gpt-3.5-turbo"
TURBO_3_5_INSTRUCT = "gpt-3.5-turbo-instruct"
AZURE_3_5_TURBO = "gpt-35-turbo-0125"
AZURE_4_TURBO = "gpt-4-0125-Preview"
AZURE_4 = "gpt-4-0613"

"""The costs of the models above (per million tokens)."""
"""The costs of the models above (per million tokens). Dictionary used in case the user does not provide the costs."""
MODELS_COST = {
"gpt-3.5-turbo": {
"input": 0.5,
Expand All @@ -33,4 +24,8 @@ class Models(Enum):
"input": 30,
"output": 60
},
"text-embedding-ada-002": {
"input": 0.1,
"output": 0.0
}
}
110 changes: 64 additions & 46 deletions openai_cost_logger/openai_cost_logger.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,21 @@
import csv
import json
import warnings
from typing import Dict
from pathlib import Path
from time import strftime
from openai.types.chat.chat_completion import ChatCompletion
from openai._models import BaseModel # all the api responses extend BaseModel

from openai_cost_logger.constants import DEFAULT_LOG_PATH, MODELS_COST

from openai_cost_logger.constants import DEFAULT_LOG_PATH

"""Every cost is per million tokens."""
COST_UNIT = 1_000_000

"""Header of the cost log file."""
FILE_HEADER = [
"experiment_name",
"model",
"cost"
]

"""OpenAI cost logger."""
class OpenAICostLogger:
def __init__(
self,
model: str,
input_cost: float,
output_cost: float,
experiment_name: str,
cost_upperbound: float = float('inf'),
log_folder: str = DEFAULT_LOG_PATH,
Expand All @@ -32,21 +24,14 @@ def __init__(
"""Initialize the cost logger.
Args:
client (enum.ClientType): The client to use.
model (str): The model to use.
cost_upperbound (float): The upperbound of the cost after which an exception is raised.
input_cost (float): The cost per million tokens for the input.
output_cost (float): The cost per million tokens for the output.
experiment_name (str): The name of the experiment.
cost_upperbound (float): The upperbound of the cost after which an exception is raised.
log_folder (str): The folder where to save the cost logs.
client_args (Dict, optional): The parameters to pass to the client. Defaults to {}.
log_level (str): The level of logging. # TODO: implement logging levels.
"""
self.cost = 0
self.n_responses = 0
self.model = model
self.input_cost = input_cost
self.log_folder = log_folder
self.output_cost = output_cost
self.experiment_name = experiment_name
self.cost_upperbound = cost_upperbound
self.log_level = log_level
Expand All @@ -58,16 +43,28 @@ def __init__(
self.__build_log_file()


def update_cost(self, response: ChatCompletion) -> None:
"""Extract the number of input and output tokens from a chat completion response
and update the cost. Saves experiment costs to file, overwriting it.
def update_cost(self, response: BaseModel, input_cost: float = None, output_cost: float = None) -> None:
"""Extract the cost from the response and update the cost tracker.
Then write the cost to the json file for temporary storage.
Be aware that:
- the cost is calculated per million tokens.
- if input_cost and output_cost are not provided, the cost tracker will search for the values in the default dictionary.
In case the values are not found, the cost tracker will raise an exception.
Args:
response: ChatCompletion object from the model.
response (BaseModel): BaseModel object from the model.
input_cost (float, optional): The cost per million tokens for the input. Defaults to None.
output_cost (float, optional): The cost per million tokens for the output. Defaults to None.
"""
self.cost += self.__get_answer_cost(response)
if (input_cost is None or output_cost is None) and response.model not in MODELS_COST:
raise Exception(f"Model {response.model} not found in the cost dictionary. Please provide the input and output cost.")

input_cost = MODELS_COST[response.model]["input"] if input_cost is None else input_cost
output_cost = MODELS_COST[response.model]["output"] if output_cost is None else output_cost
self.cost += self.__get_answer_cost(response=response, input_cost=input_cost, output_cost=output_cost)
self.n_responses += 1
self.__write_cost_to_json(response)
self.__write_cost_to_json(response=response, input_cost=input_cost, output_cost=output_cost)
self.__validate_cost()


Expand All @@ -80,16 +77,24 @@ def get_current_cost(self) -> float:
return self.cost


def __get_answer_cost(self, answer: Dict) -> float:
"""Calculate the cost of the answer based on the input and output tokens.
def __get_answer_cost(self, response: BaseModel, input_cost: float, output_cost: float) -> float:
"""Calculate the cost of the response based on the input and output tokens.
Args:
answer (dict): The response from the model.
response (BaseModel): The response from the model.
input_cost (float): The cost per million tokens for the input.
output_cost (float): The cost per million tokens for the output.
Returns:
float: The cost of the answer.
float: The cost of the answer.
Raises:
RuntimeWarning: If the output cost is 0 and there are completion tokens.
"""
return (self.input_cost * answer.usage.prompt_tokens) / COST_UNIT + \
(self.output_cost * answer.usage.completion_tokens) / COST_UNIT
completion_tokens = response.usage.total_tokens - response.usage.prompt_tokens

if completion_tokens != 0 and output_cost == 0:
warnings.warn(f"Output cost: {output_cost}. Found {completion_tokens} completion tokens.", RuntimeWarning)

return (input_cost * response.usage.prompt_tokens) / COST_UNIT + (output_cost * completion_tokens) / COST_UNIT


def __validate_cost(self):
Expand All @@ -98,21 +103,27 @@ def __validate_cost(self):
Raises:
Exception: If the cost exceeds the upperbound.
"""
if self.cost > self.cost_upperbound:
if self.cost > self.cost_upperbound:
raise Exception(f"Cost exceeded upperbound: {self.cost} > {self.cost_upperbound}")


def __write_cost_to_json(self, response: ChatCompletion) -> None:
"""Write the cost to a json file.
def __write_cost_to_json(self, response: BaseModel, input_cost: float, output_cost: float) -> None:
"""Write the cost to the json file.
Args:
response (ChatCompletion): The response from the model.
response (BaseModel): The response from the model.
input_cost (float): The cost per million tokens for the input.
output_cost (float): The cost per million tokens for the output.
"""
with open(self.filepath, 'r') as file:
data = json.load(file)
data["total_cost"] = self.cost
data["total_responses"] = self.n_responses
data["breakdown"].append(self.__build_log_breadown_entry(response))
data["breakdown"].append(self.__build_log_breadown_entry(
response=response,
input_cost=input_cost,
output_cost=output_cost
))
with open(self.filepath, 'w') as file:
json.dump(data, file, indent=4)

Expand All @@ -127,7 +138,6 @@ def __build_log_file(self) -> None:
log_file_template = {
"experiment_name": self.experiment_name,
"creation_datetime": strftime("%Y-%m-%d %H:%M:%S"),
"model": self.model,
"total_cost": self.cost,
"total_responses": 0,
"breakdown": []
Expand All @@ -136,20 +146,28 @@ def __build_log_file(self) -> None:
json.dump(log_file_template, file, indent=4)


def __build_log_breadown_entry(self, response: ChatCompletion) -> Dict:
def __build_log_breadown_entry(self, response: BaseModel, input_cost: float, output_cost: float) -> Dict:
"""Build a json log entry for the breakdown of the cost.
Be aware that:
- The content of the response is supported only for the completion models.
Args:
response (ChatCompletion): The response from the model.
response (BaseModel): The response from the model.
input_cost (float): The cost per million tokens for the input.
output_cost (float): The cost per million tokens for the output.
Returns:
Dict: The json log entry.
"""
output_tokens = response.usage.total_tokens - response.usage.prompt_tokens
content = response.choices[0].message.content if hasattr(response, "choices") else "content-not-supported-for-this-model"
return {
"cost": self.__get_answer_cost(response),
"model": response.model,
"cost": self.__get_answer_cost(response=response, input_cost=input_cost, output_cost=output_cost),
"input_cost_per_million": input_cost,
"output_cost_per_million": output_cost,
"input_tokens": response.usage.prompt_tokens,
"output_tokens": response.usage.completion_tokens,
"content": response.choices[0].message.content,
"inferred_model": response.model,
"output_tokens": output_tokens,
"content": content,
"datetime": strftime("%Y-%m-%d %H:%M:%S"),
}
3 changes: 2 additions & 1 deletion openai_cost_logger/openai_cost_logger_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from pathlib import Path


"""OpenAI cost logger utilities functions."""
class OpenAICostLoggerUtils:

@staticmethod
def get_api_key(path: str) -> str:
def read_api_key(path: str) -> str:
"""Return the key contained in the file.
Args:
Expand Down
10 changes: 5 additions & 5 deletions openai_cost_logger/openai_cost_logger_viz.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import os
import json
from datetime import datetime
from typing import Dict
from pathlib import Path
from datetime import datetime
import matplotlib.pyplot as plt
from collections import defaultdict

from openai_cost_logger.constants import DEFAULT_LOG_PATH


"""Cost logger visualizer."""
class OpenAICostLoggerViz:

Expand Down Expand Up @@ -57,9 +58,8 @@ def get_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> Dict[str, float]:
if filename.endswith(".json"):
with open(Path(path, filename), mode='r') as file:
data = json.load(file)
if data["model"] not in cost_by_model:
cost_by_model[data["model"]] = 0
cost_by_model[data["model"]] += data["total_cost"]
for entry in data["breakdown"]:
cost_by_model[entry["model"]] += entry["cost"]
return cost_by_model


Expand All @@ -70,7 +70,7 @@ def print_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> None:
log_folder (str, optional): Cost logs directory. Defaults to DEFAULT_LOG_PATH.
This method reads all the files in the specified directory.
"""
cost_by_model = OpenAICostLoggerViz.get_total_cost_by_model(path)
cost_by_model = OpenAICostLoggerViz.get_total_cost_by_model(path=path)
for model, cost in cost_by_model.items():
print(f"{model}: {round(cost, 6)} (USD)")

Expand Down

0 comments on commit 0bfbad1

Please sign in to comment.