Skip to content

Commit

Permalink
feat(ai-monitoring): Calculate an AI model cost metric per-span (#3554)
Browse files Browse the repository at this point in the history
AI models have different prices, for example OpenAI's implementation of
GPT-4 costs 30$ per million tokens of input, and 60$ per million dollars
of output.

We'd like to be able to make graphs of how much money you've spent on
your AI projects, grouped by, e.g., AI pipeline name.

It's messy to put this logic in SDKs, as we can't update those once
launched. If we put the logic in Sentry, then any price changes would
retroactively apply.

---------

Co-authored-by: Joris Bayer <joris.bayer@sentry.io>
  • Loading branch information
colin-sentry and jjbayer committed May 14, 2024
1 parent 1ddc9a2 commit eb94d40
Show file tree
Hide file tree
Showing 19 changed files with 373 additions and 157 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
- Add AI model costs to global config. ([#3579](https://github.com/getsentry/relay/pull/3579))
- Add support for `event.` in the `Span` `Getter` implementation. ([#3577](https://github.com/getsentry/relay/pull/3577))
- Ensure `chunk_id` and `profiler_id` are UUIDs and sort samples. ([#3588](https://github.com/getsentry/relay/pull/3588))
- Add a calculated measurement based on the AI model and the tokens used. ([#3554](https://github.com/getsentry/relay/pull/3554))


## 24.4.2

Expand Down
3 changes: 2 additions & 1 deletion relay-cabi/src/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,8 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event(
max_tag_value_length: usize::MAX,
span_description_rules: None,
performance_score: None,
geoip_lookup: None, // only supported in relay
geoip_lookup: None, // only supported in relay
ai_model_costs: None, // only supported in relay
enable_trimming: config.enable_trimming.unwrap_or_default(),
measurements: None,
normalize_spans: config.normalize_spans,
Expand Down
73 changes: 0 additions & 73 deletions relay-dynamic-config/src/ai.rs

This file was deleted.

35 changes: 35 additions & 0 deletions relay-dynamic-config/src/defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,41 @@ pub fn hardcoded_span_metrics() -> Vec<(String, Vec<MetricSpec>)> {
.always(), // already guarded by condition on metric
],
},
MetricSpec {
category: DataCategory::Span,
mri: "c:spans/ai.total_cost@usd".into(),
field: Some("span.measurements.ai_total_cost.value".into()),
condition: Some(is_ai.clone()),
tags: vec![
Tag::with_key("span.op")
.from_field("span.sentry_tags.op")
.always(),
Tag::with_key("environment")
.from_field("span.sentry_tags.environment")
.always(),
Tag::with_key("release")
.from_field("span.sentry_tags.release")
.always(),
Tag::with_key("span.origin")
.from_field("span.origin")
.always(),
Tag::with_key("span.category")
.from_field("span.sentry_tags.category")
.always(), // already guarded by condition on metric
Tag::with_key("span.ai.pipeline.group")
.from_field("span.sentry_tags.ai_pipeline_group")
.always(), // already guarded by condition on metric
Tag::with_key("span.description")
.from_field("span.sentry_tags.description")
.always(), // already guarded by condition on metric
Tag::with_key("span.group")
.from_field("span.sentry_tags.group")
.always(), // already guarded by condition on metric
Tag::with_key("span.op")
.from_field("span.sentry_tags.op")
.always(), // already guarded by condition on metric
],
},
MetricSpec {
category: DataCategory::Span,
mri: "d:spans/webvital.score.total@ratio".into(),
Expand Down
9 changes: 4 additions & 5 deletions relay-dynamic-config/src/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@ use std::io::BufReader;
use std::path::Path;

use relay_base_schema::metrics::MetricNamespace;
use relay_event_normalization::MeasurementsConfig;
use relay_event_normalization::{MeasurementsConfig, ModelCosts};
use relay_filter::GenericFiltersConfig;
use relay_quotas::Quota;
use serde::{de, Deserialize, Serialize};
use serde_json::Value;

use crate::ai::ModelCosts;
use crate::{ai, defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups};
use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups};

/// A dynamic configuration for all Relays passed down from Sentry.
///
Expand Down Expand Up @@ -50,7 +49,7 @@ pub struct GlobalConfig {

/// Configuration for AI span measurements.
#[serde(skip_serializing_if = "is_missing")]
pub ai_model_costs: ErrorBoundary<ai::ModelCosts>,
pub ai_model_costs: ErrorBoundary<ModelCosts>,
}

impl GlobalConfig {
Expand Down Expand Up @@ -406,7 +405,7 @@ fn is_ok_and_empty(value: &ErrorBoundary<MetricExtractionGroups>) -> bool {
)
}

fn is_missing(value: &ErrorBoundary<ai::ModelCosts>) -> bool {
fn is_missing(value: &ErrorBoundary<ModelCosts>) -> bool {
matches!(
value,
&ErrorBoundary::Ok(ModelCosts{ version, ref costs }) if version == 0 && costs.is_empty()
Expand Down
1 change: 0 additions & 1 deletion relay-dynamic-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
)]
#![allow(clippy::derive_partial_eq_without_eq)]

mod ai;
mod defaults;
mod error_boundary;
mod feature;
Expand Down
117 changes: 115 additions & 2 deletions relay-event-normalization/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ use smallvec::SmallVec;
use uuid::Uuid;

use crate::normalize::request;
use crate::span::ai::normalize_ai_measurements;
use crate::span::tag_extraction::extract_span_tags_from_event;
use crate::utils::{self, get_event_user_tag, MAX_DURATION_MOBILE_MS};
use crate::{
breakdowns, event_error, legacy, mechanism, remove_other, schema, span, stacktrace,
transactions, trimming, user_agent, BreakdownsConfig, CombinedMeasurementsConfig, GeoIpLookup,
MaxChars, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, TransactionNameConfig,
MaxChars, ModelCosts, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule,
TransactionNameConfig,
};

/// Configuration for [`normalize_event`].
Expand Down Expand Up @@ -132,6 +134,9 @@ pub struct NormalizationConfig<'a> {
/// Configuration for generating performance score measurements for web vitals
pub performance_score: Option<&'a PerformanceScoreConfig>,

/// Configuration for calculating the cost of AI model runs
pub ai_model_costs: Option<&'a ModelCosts>,

/// An initialized GeoIP lookup.
pub geoip_lookup: Option<&'a GeoIpLookup>,

Expand Down Expand Up @@ -175,6 +180,7 @@ impl<'a> Default for NormalizationConfig<'a> {
span_description_rules: Default::default(),
performance_score: Default::default(),
geoip_lookup: Default::default(),
ai_model_costs: Default::default(),
enable_trimming: false,
measurements: None,
normalize_spans: true,
Expand Down Expand Up @@ -292,6 +298,7 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) {
config.max_name_and_unit_len,
); // Measurements are part of the metric extraction
normalize_performance_score(event, config.performance_score);
normalize_ai_measurements(event, config.ai_model_costs);
normalize_breakdowns(event, config.breakdowns_config); // Breakdowns are part of the metric extraction too
normalize_default_attributes(event, meta, config);

Expand Down Expand Up @@ -1411,14 +1418,15 @@ mod tests {

use insta::{assert_debug_snapshot, assert_json_snapshot};
use itertools::Itertools;
use relay_common::glob2::LazyGlob;
use relay_event_schema::protocol::{
Breadcrumb, Csp, DebugMeta, DeviceContext, MetricSummary, MetricsSummary, Span, Values,
};
use relay_protocol::{get_value, SerializableAnnotated};
use serde_json::json;

use super::*;
use crate::{ClientHints, MeasurementsConfig};
use crate::{ClientHints, MeasurementsConfig, ModelCost};

const IOS_MOBILE_EVENT: &str = r#"
{
Expand Down Expand Up @@ -2087,6 +2095,111 @@ mod tests {
"###);
}

#[test]
fn test_ai_measurements() {
let json = r#"
{
"spans": [
{
"timestamp": 1702474613.0495,
"start_timestamp": 1702474613.0175,
"description": "OpenAI ",
"op": "ai.chat_completions.openai",
"span_id": "9c01bd820a083e63",
"parent_span_id": "a1e13f3f06239d69",
"trace_id": "922dda2462ea4ac2b6a4b339bee90863",
"measurements": {
"ai_total_tokens_used": {
"value": 1230
}
},
"data": {
"ai.pipeline.name": "Autofix Pipeline",
"ai.model_id": "claude-2.1"
}
},
{
"timestamp": 1702474613.0495,
"start_timestamp": 1702474613.0175,
"description": "OpenAI ",
"op": "ai.chat_completions.openai",
"span_id": "ac01bd820a083e63",
"parent_span_id": "a1e13f3f06239d69",
"trace_id": "922dda2462ea4ac2b6a4b339bee90863",
"measurements": {
"ai_prompt_tokens_used": {
"value": 1000
},
"ai_completion_tokens_used": {
"value": 2000
}
},
"data": {
"ai.pipeline.name": "Autofix Pipeline",
"ai.model_id": "gpt4-21-04"
}
}
]
}
"#;

let mut event = Annotated::<Event>::from_json(json).unwrap();

normalize_event(
&mut event,
&NormalizationConfig {
ai_model_costs: Some(&ModelCosts {
version: 1,
costs: vec![
ModelCost {
model_id: LazyGlob::new("claude-2*".into()),
for_completion: false,
cost_per_1k_tokens: 1.0,
},
ModelCost {
model_id: LazyGlob::new("gpt4-21*".into()),
for_completion: false,
cost_per_1k_tokens: 2.0,
},
ModelCost {
model_id: LazyGlob::new("gpt4-21*".into()),
for_completion: true,
cost_per_1k_tokens: 20.0,
},
],
}),
..NormalizationConfig::default()
},
);

let spans = event.value().unwrap().spans.value().unwrap();
assert_eq!(spans.len(), 2);
assert_eq!(
spans
.first()
.unwrap()
.value()
.unwrap()
.measurements
.value()
.unwrap()
.get_value("ai_total_cost"),
Some(1.23)
);
assert_eq!(
spans
.get(1)
.unwrap()
.value()
.unwrap()
.measurements
.value()
.unwrap()
.get_value("ai_total_cost"),
Some(20.0 * 2.0 + 2.0)
);
}

#[test]
fn test_apple_high_device_class() {
let mut event = Event {
Expand Down
Loading

0 comments on commit eb94d40

Please sign in to comment.