feat(ai-monitoring): Calculate an AI model cost metric per-span (#3554)

AI models have different prices, for example OpenAI's implementation of GPT-4 costs 30$ per million tokens of input, and 60$ per million dollars of output. We'd like to be able to make graphs of how much money you've spent on your AI projects, grouped by, e.g., AI pipeline name. It's messy to put this logic in SDKs, as we can't update those once launched. If we put the logic in Sentry, then any price changes would retroactively apply. --------- Co-authored-by: Joris Bayer <joris.bayer@sentry.io>
getsentry · May 14, 2024 · eb94d40 · eb94d40
1 parent 1ddc9a2
commit eb94d40
Show file tree

Hide file tree

Showing 19 changed files with 373 additions and 157 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,8 @@
 - Add AI model costs to global config. ([#3579](https://github.com/getsentry/relay/pull/3579))
 - Add support for `event.` in the `Span` `Getter` implementation. ([#3577](https://github.com/getsentry/relay/pull/3577))
 - Ensure `chunk_id` and `profiler_id` are UUIDs and sort samples. ([#3588](https://github.com/getsentry/relay/pull/3588))
+- Add a calculated measurement based on the AI model and the tokens used. ([#3554](https://github.com/getsentry/relay/pull/3554))
+
 
 ## 24.4.2
 

diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs
@@ -273,7 +273,8 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event(
         max_tag_value_length: usize::MAX,
         span_description_rules: None,
         performance_score: None,
-        geoip_lookup: None, // only supported in relay
+        geoip_lookup: None,   // only supported in relay
+        ai_model_costs: None, // only supported in relay
         enable_trimming: config.enable_trimming.unwrap_or_default(),
         measurements: None,
         normalize_spans: config.normalize_spans,

diff --git a/relay-dynamic-config/src/ai.rs b/relay-dynamic-config/src/ai.rs
diff --git a/relay-dynamic-config/src/defaults.rs b/relay-dynamic-config/src/defaults.rs
@@ -505,6 +505,41 @@ pub fn hardcoded_span_metrics() -> Vec<(String, Vec<MetricSpec>)> {
                             .always(), // already guarded by condition on metric
                     ],
                 },
+                MetricSpec {
+                    category: DataCategory::Span,
+                    mri: "c:spans/ai.total_cost@usd".into(),
+                    field: Some("span.measurements.ai_total_cost.value".into()),
+                    condition: Some(is_ai.clone()),
+                    tags: vec![
+                        Tag::with_key("span.op")
+                            .from_field("span.sentry_tags.op")
+                            .always(),
+                        Tag::with_key("environment")
+                            .from_field("span.sentry_tags.environment")
+                            .always(),
+                        Tag::with_key("release")
+                            .from_field("span.sentry_tags.release")
+                            .always(),
+                        Tag::with_key("span.origin")
+                            .from_field("span.origin")
+                            .always(),
+                        Tag::with_key("span.category")
+                            .from_field("span.sentry_tags.category")
+                            .always(), // already guarded by condition on metric
+                        Tag::with_key("span.ai.pipeline.group")
+                            .from_field("span.sentry_tags.ai_pipeline_group")
+                            .always(), // already guarded by condition on metric
+                        Tag::with_key("span.description")
+                            .from_field("span.sentry_tags.description")
+                            .always(), // already guarded by condition on metric
+                        Tag::with_key("span.group")
+                            .from_field("span.sentry_tags.group")
+                            .always(), // already guarded by condition on metric
+                        Tag::with_key("span.op")
+                            .from_field("span.sentry_tags.op")
+                            .always(), // already guarded by condition on metric
+                    ],
+                },
                 MetricSpec {
                     category: DataCategory::Span,
                     mri: "d:spans/webvital.score.total@ratio".into(),

diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs
@@ -5,14 +5,13 @@ use std::io::BufReader;
 use std::path::Path;
 
 use relay_base_schema::metrics::MetricNamespace;
-use relay_event_normalization::MeasurementsConfig;
+use relay_event_normalization::{MeasurementsConfig, ModelCosts};
 use relay_filter::GenericFiltersConfig;
 use relay_quotas::Quota;
 use serde::{de, Deserialize, Serialize};
 use serde_json::Value;
 
-use crate::ai::ModelCosts;
-use crate::{ai, defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups};
+use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups};
 
 /// A dynamic configuration for all Relays passed down from Sentry.
 ///
@@ -50,7 +49,7 @@ pub struct GlobalConfig {
 
     /// Configuration for AI span measurements.
     #[serde(skip_serializing_if = "is_missing")]
-    pub ai_model_costs: ErrorBoundary<ai::ModelCosts>,
+    pub ai_model_costs: ErrorBoundary<ModelCosts>,
 }
 
 impl GlobalConfig {
@@ -406,7 +405,7 @@ fn is_ok_and_empty(value: &ErrorBoundary<MetricExtractionGroups>) -> bool {
     )
 }
 
-fn is_missing(value: &ErrorBoundary<ai::ModelCosts>) -> bool {
+fn is_missing(value: &ErrorBoundary<ModelCosts>) -> bool {
     matches!(
         value,
         &ErrorBoundary::Ok(ModelCosts{ version, ref costs }) if version == 0 && costs.is_empty()

diff --git a/relay-dynamic-config/src/lib.rs b/relay-dynamic-config/src/lib.rs
@@ -61,7 +61,6 @@
 )]
 #![allow(clippy::derive_partial_eq_without_eq)]
 
-mod ai;
 mod defaults;
 mod error_boundary;
 mod feature;

diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs
@@ -28,12 +28,14 @@ use smallvec::SmallVec;
 use uuid::Uuid;
 
 use crate::normalize::request;
+use crate::span::ai::normalize_ai_measurements;
 use crate::span::tag_extraction::extract_span_tags_from_event;
 use crate::utils::{self, get_event_user_tag, MAX_DURATION_MOBILE_MS};
 use crate::{
     breakdowns, event_error, legacy, mechanism, remove_other, schema, span, stacktrace,
     transactions, trimming, user_agent, BreakdownsConfig, CombinedMeasurementsConfig, GeoIpLookup,
-    MaxChars, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, TransactionNameConfig,
+    MaxChars, ModelCosts, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule,
+    TransactionNameConfig,
 };
 
 /// Configuration for [`normalize_event`].
@@ -132,6 +134,9 @@ pub struct NormalizationConfig<'a> {
     /// Configuration for generating performance score measurements for web vitals
     pub performance_score: Option<&'a PerformanceScoreConfig>,
 
+    /// Configuration for calculating the cost of AI model runs
+    pub ai_model_costs: Option<&'a ModelCosts>,
+
     /// An initialized GeoIP lookup.
     pub geoip_lookup: Option<&'a GeoIpLookup>,
 
@@ -175,6 +180,7 @@ impl<'a> Default for NormalizationConfig<'a> {
             span_description_rules: Default::default(),
             performance_score: Default::default(),
             geoip_lookup: Default::default(),
+            ai_model_costs: Default::default(),
             enable_trimming: false,
             measurements: None,
             normalize_spans: true,
@@ -292,6 +298,7 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) {
         config.max_name_and_unit_len,
     ); // Measurements are part of the metric extraction
     normalize_performance_score(event, config.performance_score);
+    normalize_ai_measurements(event, config.ai_model_costs);
     normalize_breakdowns(event, config.breakdowns_config); // Breakdowns are part of the metric extraction too
     normalize_default_attributes(event, meta, config);
 
@@ -1411,14 +1418,15 @@ mod tests {
 
     use insta::{assert_debug_snapshot, assert_json_snapshot};
     use itertools::Itertools;
+    use relay_common::glob2::LazyGlob;
     use relay_event_schema::protocol::{
         Breadcrumb, Csp, DebugMeta, DeviceContext, MetricSummary, MetricsSummary, Span, Values,
     };
     use relay_protocol::{get_value, SerializableAnnotated};
     use serde_json::json;
 
     use super::*;
-    use crate::{ClientHints, MeasurementsConfig};
+    use crate::{ClientHints, MeasurementsConfig, ModelCost};
 
     const IOS_MOBILE_EVENT: &str = r#"
         {
@@ -2087,6 +2095,111 @@ mod tests {
         "###);
     }
 
+    #[test]
+    fn test_ai_measurements() {
+        let json = r#"
+            {
+                "spans": [
+                    {
+                        "timestamp": 1702474613.0495,
+                        "start_timestamp": 1702474613.0175,
+                        "description": "OpenAI ",
+                        "op": "ai.chat_completions.openai",
+                        "span_id": "9c01bd820a083e63",
+                        "parent_span_id": "a1e13f3f06239d69",
+                        "trace_id": "922dda2462ea4ac2b6a4b339bee90863",
+                        "measurements": {
+                            "ai_total_tokens_used": {
+                                "value": 1230
+                            }
+                        },
+                        "data": {
+                            "ai.pipeline.name": "Autofix Pipeline",
+                            "ai.model_id": "claude-2.1"
+                        }
+                    },
+                    {
+                        "timestamp": 1702474613.0495,
+                        "start_timestamp": 1702474613.0175,
+                        "description": "OpenAI ",
+                        "op": "ai.chat_completions.openai",
+                        "span_id": "ac01bd820a083e63",
+                        "parent_span_id": "a1e13f3f06239d69",
+                        "trace_id": "922dda2462ea4ac2b6a4b339bee90863",
+                        "measurements": {
+                            "ai_prompt_tokens_used": {
+                                "value": 1000
+                            },
+                            "ai_completion_tokens_used": {
+                                "value": 2000
+                            }
+                        },
+                        "data": {
+                            "ai.pipeline.name": "Autofix Pipeline",
+                            "ai.model_id": "gpt4-21-04"
+                        }
+                    }
+                ]
+            }
+        "#;
+
+        let mut event = Annotated::<Event>::from_json(json).unwrap();
+
+        normalize_event(
+            &mut event,
+            &NormalizationConfig {
+                ai_model_costs: Some(&ModelCosts {
+                    version: 1,
+                    costs: vec![
+                        ModelCost {
+                            model_id: LazyGlob::new("claude-2*".into()),
+                            for_completion: false,
+                            cost_per_1k_tokens: 1.0,
+                        },
+                        ModelCost {
+                            model_id: LazyGlob::new("gpt4-21*".into()),
+                            for_completion: false,
+                            cost_per_1k_tokens: 2.0,
+                        },
+                        ModelCost {
+                            model_id: LazyGlob::new("gpt4-21*".into()),
+                            for_completion: true,
+                            cost_per_1k_tokens: 20.0,
+                        },
+                    ],
+                }),
+                ..NormalizationConfig::default()
+            },
+        );
+
+        let spans = event.value().unwrap().spans.value().unwrap();
+        assert_eq!(spans.len(), 2);
+        assert_eq!(
+            spans
+                .first()
+                .unwrap()
+                .value()
+                .unwrap()
+                .measurements
+                .value()
+                .unwrap()
+                .get_value("ai_total_cost"),
+            Some(1.23)
+        );
+        assert_eq!(
+            spans
+                .get(1)
+                .unwrap()
+                .value()
+                .unwrap()
+                .measurements
+                .value()
+                .unwrap()
+                .get_value("ai_total_cost"),
+            Some(20.0 * 2.0 + 2.0)
+        );
+    }
+
     #[test]
     fn test_apple_high_device_class() {
         let mut event = Event {