From eb94d405e435db615578f167b9edfeb42723eeab Mon Sep 17 00:00:00 2001
From: colin-sentry <161344340+colin-sentry@users.noreply.github.com>
Date: Tue, 14 May 2024 10:40:21 -0400
Subject: [PATCH] feat(ai-monitoring): Calculate an AI model cost metric
 per-span (#3554)

AI models have different prices, for example OpenAI's implementation of
GPT-4 costs 30$ per million tokens of input, and 60$ per million dollars
of output.

We'd like to be able to make graphs of how much money you've spent on
your AI projects, grouped by, e.g., AI pipeline name.

It's messy to put this logic in SDKs, as we can't update those once
launched. If we put the logic in Sentry, then any price changes would
retroactively apply.

---------

Co-authored-by: Joris Bayer <joris.bayer@sentry.io>
---
 CHANGELOG.md                                  |   2 +
 relay-cabi/src/processing.rs                  |   3 +-
 relay-dynamic-config/src/ai.rs                |  73 -----------
 relay-dynamic-config/src/defaults.rs          |  35 ++++++
 relay-dynamic-config/src/global.rs            |   9 +-
 relay-dynamic-config/src/lib.rs               |   1 -
 relay-event-normalization/src/event.rs        | 117 +++++++++++++++++-
 .../src/normalize/mod.rs                      |  67 ++++++++++
 .../src/normalize/span/ai.rs                  |  91 ++++++++++++++
 .../src/normalize/span/mod.rs                 |   1 +
 relay-event-schema/src/protocol/span.rs       |  20 +--
 .../src/protocol/span/convert.rs              |   4 +-
 relay-pii/src/processor.rs                    |  39 +-----
 relay-server/src/metrics_extraction/event.rs  |   3 +
 ...n__event__tests__extract_span_metrics.snap |  26 ++++
 ...t__tests__extract_span_metrics_mobile.snap |  20 +--
 relay-server/src/services/processor.rs        |   2 +
 .../src/services/processor/span/processing.rs |  13 +-
 relay-spans/src/span.rs                       |   4 +-
 19 files changed, 373 insertions(+), 157 deletions(-)
 delete mode 100644 relay-dynamic-config/src/ai.rs
 create mode 100644 relay-event-normalization/src/normalize/span/ai.rs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1aec97e485..7312fc1403 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,8 @@
 - Add AI model costs to global config. ([#3579](https://github.com/getsentry/relay/pull/3579))
 - Add support for `event.` in the `Span` `Getter` implementation. ([#3577](https://github.com/getsentry/relay/pull/3577))
 - Ensure `chunk_id` and `profiler_id` are UUIDs and sort samples. ([#3588](https://github.com/getsentry/relay/pull/3588))
+- Add a calculated measurement based on the AI model and the tokens used. ([#3554](https://github.com/getsentry/relay/pull/3554))
+
 
 ## 24.4.2
 
diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs
index 95ae539e29..c5d3f8b81e 100644
--- a/relay-cabi/src/processing.rs
+++ b/relay-cabi/src/processing.rs
@@ -273,7 +273,8 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event(
         max_tag_value_length: usize::MAX,
         span_description_rules: None,
         performance_score: None,
-        geoip_lookup: None, // only supported in relay
+        geoip_lookup: None,   // only supported in relay
+        ai_model_costs: None, // only supported in relay
         enable_trimming: config.enable_trimming.unwrap_or_default(),
         measurements: None,
         normalize_spans: config.normalize_spans,
diff --git a/relay-dynamic-config/src/ai.rs b/relay-dynamic-config/src/ai.rs
deleted file mode 100644
index a9b340c68c..0000000000
--- a/relay-dynamic-config/src/ai.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-//! Configuration for measurements generated from AI model instrumentation.
-
-use relay_common::glob2::LazyGlob;
-use serde::{Deserialize, Serialize};
-
-const MAX_SUPPORTED_VERSION: u16 = 1;
-
-#[derive(Clone, Default, Debug, Serialize, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct ModelCosts {
-    pub version: u16,
-    #[serde(skip_serializing_if = "Vec::is_empty")]
-    pub costs: Vec<ModelCost>,
-}
-
-impl ModelCosts {
-    /// `false` if measurement and metrics extraction should be skipped.
-    pub fn is_enabled(&self) -> bool {
-        self.version > 0 && self.version <= MAX_SUPPORTED_VERSION
-    }
-
-    /// Gets the cost per 1000 tokens, if defined for the given model.
-    pub fn cost_per_1k_tokens(&self, model_id: &str, for_completion: bool) -> Option<f32> {
-        self.costs
-            .iter()
-            .find(|cost| cost.matches(model_id, for_completion))
-            .map(|c| c.cost_per_1k_tokens)
-    }
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct ModelCost {
-    model_id: LazyGlob,
-    for_completion: bool,
-    cost_per_1k_tokens: f32,
-}
-
-impl ModelCost {
-    /// `true` if this cost definition matches the given model.
-    pub fn matches(&self, model_id: &str, for_completion: bool) -> bool {
-        self.for_completion == for_completion && self.model_id.compiled().is_match(model_id)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use insta::assert_debug_snapshot;
-
-    use super::*;
-
-    #[test]
-    fn roundtrip() {
-        let original = r#"{"version":1,"costs":[{"modelId":"babbage-002.ft-*","forCompletion":false,"costPer1kTokens":0.0016}]}"#;
-        let deserialized: ModelCosts = serde_json::from_str(original).unwrap();
-        assert_debug_snapshot!(deserialized, @r###"
-        ModelCosts {
-            version: 1,
-            costs: [
-                ModelCost {
-                    model_id: LazyGlob("babbage-002.ft-*"),
-                    for_completion: false,
-                    cost_per_1k_tokens: 0.0016,
-                },
-            ],
-        }
-        "###);
-
-        let serialized = serde_json::to_string(&deserialized).unwrap();
-        // Patch floating point
-        assert_eq!(&serialized, original);
-    }
-}
diff --git a/relay-dynamic-config/src/defaults.rs b/relay-dynamic-config/src/defaults.rs
index c0a6e6006e..6c6140f9f0 100644
--- a/relay-dynamic-config/src/defaults.rs
+++ b/relay-dynamic-config/src/defaults.rs
@@ -505,6 +505,41 @@ pub fn hardcoded_span_metrics() -> Vec<(String, Vec<MetricSpec>)> {
                             .always(), // already guarded by condition on metric
                     ],
                 },
+                MetricSpec {
+                    category: DataCategory::Span,
+                    mri: "c:spans/ai.total_cost@usd".into(),
+                    field: Some("span.measurements.ai_total_cost.value".into()),
+                    condition: Some(is_ai.clone()),
+                    tags: vec![
+                        Tag::with_key("span.op")
+                            .from_field("span.sentry_tags.op")
+                            .always(),
+                        Tag::with_key("environment")
+                            .from_field("span.sentry_tags.environment")
+                            .always(),
+                        Tag::with_key("release")
+                            .from_field("span.sentry_tags.release")
+                            .always(),
+                        Tag::with_key("span.origin")
+                            .from_field("span.origin")
+                            .always(),
+                        Tag::with_key("span.category")
+                            .from_field("span.sentry_tags.category")
+                            .always(), // already guarded by condition on metric
+                        Tag::with_key("span.ai.pipeline.group")
+                            .from_field("span.sentry_tags.ai_pipeline_group")
+                            .always(), // already guarded by condition on metric
+                        Tag::with_key("span.description")
+                            .from_field("span.sentry_tags.description")
+                            .always(), // already guarded by condition on metric
+                        Tag::with_key("span.group")
+                            .from_field("span.sentry_tags.group")
+                            .always(), // already guarded by condition on metric
+                        Tag::with_key("span.op")
+                            .from_field("span.sentry_tags.op")
+                            .always(), // already guarded by condition on metric
+                    ],
+                },
                 MetricSpec {
                     category: DataCategory::Span,
                     mri: "d:spans/webvital.score.total@ratio".into(),
diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs
index fec6b5c6e6..e25eed2d9e 100644
--- a/relay-dynamic-config/src/global.rs
+++ b/relay-dynamic-config/src/global.rs
@@ -5,14 +5,13 @@ use std::io::BufReader;
 use std::path::Path;
 
 use relay_base_schema::metrics::MetricNamespace;
-use relay_event_normalization::MeasurementsConfig;
+use relay_event_normalization::{MeasurementsConfig, ModelCosts};
 use relay_filter::GenericFiltersConfig;
 use relay_quotas::Quota;
 use serde::{de, Deserialize, Serialize};
 use serde_json::Value;
 
-use crate::ai::ModelCosts;
-use crate::{ai, defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups};
+use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups};
 
 /// A dynamic configuration for all Relays passed down from Sentry.
 ///
@@ -50,7 +49,7 @@ pub struct GlobalConfig {
 
     /// Configuration for AI span measurements.
     #[serde(skip_serializing_if = "is_missing")]
-    pub ai_model_costs: ErrorBoundary<ai::ModelCosts>,
+    pub ai_model_costs: ErrorBoundary<ModelCosts>,
 }
 
 impl GlobalConfig {
@@ -406,7 +405,7 @@ fn is_ok_and_empty(value: &ErrorBoundary<MetricExtractionGroups>) -> bool {
     )
 }
 
-fn is_missing(value: &ErrorBoundary<ai::ModelCosts>) -> bool {
+fn is_missing(value: &ErrorBoundary<ModelCosts>) -> bool {
     matches!(
         value,
         &ErrorBoundary::Ok(ModelCosts{ version, ref costs }) if version == 0 && costs.is_empty()
diff --git a/relay-dynamic-config/src/lib.rs b/relay-dynamic-config/src/lib.rs
index cc84c630d6..212d63cce0 100644
--- a/relay-dynamic-config/src/lib.rs
+++ b/relay-dynamic-config/src/lib.rs
@@ -61,7 +61,6 @@
 )]
 #![allow(clippy::derive_partial_eq_without_eq)]
 
-mod ai;
 mod defaults;
 mod error_boundary;
 mod feature;
diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs
index 083c3ce35c..47cfda5c3a 100644
--- a/relay-event-normalization/src/event.rs
+++ b/relay-event-normalization/src/event.rs
@@ -28,12 +28,14 @@ use smallvec::SmallVec;
 use uuid::Uuid;
 
 use crate::normalize::request;
+use crate::span::ai::normalize_ai_measurements;
 use crate::span::tag_extraction::extract_span_tags_from_event;
 use crate::utils::{self, get_event_user_tag, MAX_DURATION_MOBILE_MS};
 use crate::{
     breakdowns, event_error, legacy, mechanism, remove_other, schema, span, stacktrace,
     transactions, trimming, user_agent, BreakdownsConfig, CombinedMeasurementsConfig, GeoIpLookup,
-    MaxChars, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, TransactionNameConfig,
+    MaxChars, ModelCosts, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule,
+    TransactionNameConfig,
 };
 
 /// Configuration for [`normalize_event`].
@@ -132,6 +134,9 @@ pub struct NormalizationConfig<'a> {
     /// Configuration for generating performance score measurements for web vitals
     pub performance_score: Option<&'a PerformanceScoreConfig>,
 
+    /// Configuration for calculating the cost of AI model runs
+    pub ai_model_costs: Option<&'a ModelCosts>,
+
     /// An initialized GeoIP lookup.
     pub geoip_lookup: Option<&'a GeoIpLookup>,
 
@@ -175,6 +180,7 @@ impl<'a> Default for NormalizationConfig<'a> {
             span_description_rules: Default::default(),
             performance_score: Default::default(),
             geoip_lookup: Default::default(),
+            ai_model_costs: Default::default(),
             enable_trimming: false,
             measurements: None,
             normalize_spans: true,
@@ -292,6 +298,7 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) {
         config.max_name_and_unit_len,
     ); // Measurements are part of the metric extraction
     normalize_performance_score(event, config.performance_score);
+    normalize_ai_measurements(event, config.ai_model_costs);
     normalize_breakdowns(event, config.breakdowns_config); // Breakdowns are part of the metric extraction too
     normalize_default_attributes(event, meta, config);
 
@@ -1411,6 +1418,7 @@ mod tests {
 
     use insta::{assert_debug_snapshot, assert_json_snapshot};
     use itertools::Itertools;
+    use relay_common::glob2::LazyGlob;
     use relay_event_schema::protocol::{
         Breadcrumb, Csp, DebugMeta, DeviceContext, MetricSummary, MetricsSummary, Span, Values,
     };
@@ -1418,7 +1426,7 @@ mod tests {
     use serde_json::json;
 
     use super::*;
-    use crate::{ClientHints, MeasurementsConfig};
+    use crate::{ClientHints, MeasurementsConfig, ModelCost};
 
     const IOS_MOBILE_EVENT: &str = r#"
         {
@@ -2087,6 +2095,111 @@ mod tests {
         "###);
     }
 
+    #[test]
+    fn test_ai_measurements() {
+        let json = r#"
+            {
+                "spans": [
+                    {
+                        "timestamp": 1702474613.0495,
+                        "start_timestamp": 1702474613.0175,
+                        "description": "OpenAI ",
+                        "op": "ai.chat_completions.openai",
+                        "span_id": "9c01bd820a083e63",
+                        "parent_span_id": "a1e13f3f06239d69",
+                        "trace_id": "922dda2462ea4ac2b6a4b339bee90863",
+                        "measurements": {
+                            "ai_total_tokens_used": {
+                                "value": 1230
+                            }
+                        },
+                        "data": {
+                            "ai.pipeline.name": "Autofix Pipeline",
+                            "ai.model_id": "claude-2.1"
+                        }
+                    },
+                    {
+                        "timestamp": 1702474613.0495,
+                        "start_timestamp": 1702474613.0175,
+                        "description": "OpenAI ",
+                        "op": "ai.chat_completions.openai",
+                        "span_id": "ac01bd820a083e63",
+                        "parent_span_id": "a1e13f3f06239d69",
+                        "trace_id": "922dda2462ea4ac2b6a4b339bee90863",
+                        "measurements": {
+                            "ai_prompt_tokens_used": {
+                                "value": 1000
+                            },
+                            "ai_completion_tokens_used": {
+                                "value": 2000
+                            }
+                        },
+                        "data": {
+                            "ai.pipeline.name": "Autofix Pipeline",
+                            "ai.model_id": "gpt4-21-04"
+                        }
+                    }
+                ]
+            }
+        "#;
+
+        let mut event = Annotated::<Event>::from_json(json).unwrap();
+
+        normalize_event(
+            &mut event,
+            &NormalizationConfig {
+                ai_model_costs: Some(&ModelCosts {
+                    version: 1,
+                    costs: vec![
+                        ModelCost {
+                            model_id: LazyGlob::new("claude-2*".into()),
+                            for_completion: false,
+                            cost_per_1k_tokens: 1.0,
+                        },
+                        ModelCost {
+                            model_id: LazyGlob::new("gpt4-21*".into()),
+                            for_completion: false,
+                            cost_per_1k_tokens: 2.0,
+                        },
+                        ModelCost {
+                            model_id: LazyGlob::new("gpt4-21*".into()),
+                            for_completion: true,
+                            cost_per_1k_tokens: 20.0,
+                        },
+                    ],
+                }),
+                ..NormalizationConfig::default()
+            },
+        );
+
+        let spans = event.value().unwrap().spans.value().unwrap();
+        assert_eq!(spans.len(), 2);
+        assert_eq!(
+            spans
+                .first()
+                .unwrap()
+                .value()
+                .unwrap()
+                .measurements
+                .value()
+                .unwrap()
+                .get_value("ai_total_cost"),
+            Some(1.23)
+        );
+        assert_eq!(
+            spans
+                .get(1)
+                .unwrap()
+                .value()
+                .unwrap()
+                .measurements
+                .value()
+                .unwrap()
+                .get_value("ai_total_cost"),
+            Some(20.0 * 2.0 + 2.0)
+        );
+    }
+
     #[test]
     fn test_apple_high_device_class() {
         let mut event = Event {
diff --git a/relay-event-normalization/src/normalize/mod.rs b/relay-event-normalization/src/normalize/mod.rs
index edf6b5cae3..57c43b4fa8 100644
--- a/relay-event-normalization/src/normalize/mod.rs
+++ b/relay-event-normalization/src/normalize/mod.rs
@@ -1,6 +1,7 @@
 use std::hash::Hash;
 
 use relay_base_schema::metrics::MetricUnit;
+use relay_common::glob2::LazyGlob;
 use relay_event_schema::protocol::{Event, VALID_PLATFORMS};
 use relay_protocol::RuleCondition;
 use serde::{Deserialize, Serialize};
@@ -215,6 +216,51 @@ pub struct PerformanceScoreConfig {
     pub profiles: Vec<PerformanceScoreProfile>,
 }
 
+/// A mapping of AI model types (like GPT-4) to their respective costs.
+#[derive(Clone, Default, Debug, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ModelCosts {
+    /// The version of the model cost struct
+    pub version: u16,
+
+    /// The mappings of model ID => cost
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    pub costs: Vec<ModelCost>,
+}
+
+impl ModelCosts {
+    const MAX_SUPPORTED_VERSION: u16 = 1;
+
+    /// `false` if measurement and metrics extraction should be skipped.
+    pub fn is_enabled(&self) -> bool {
+        self.version > 0 && self.version <= ModelCosts::MAX_SUPPORTED_VERSION
+    }
+
+    /// Gets the cost per 1000 tokens, if defined for the given model.
+    pub fn cost_per_1k_tokens(&self, model_id: &str, for_completion: bool) -> Option<f64> {
+        self.costs
+            .iter()
+            .find(|cost| cost.matches(model_id, for_completion))
+            .map(|c| c.cost_per_1k_tokens)
+    }
+}
+
+/// A single mapping of (AI model ID, input/output, cost)
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ModelCost {
+    pub(crate) model_id: LazyGlob,
+    pub(crate) for_completion: bool,
+    pub(crate) cost_per_1k_tokens: f64,
+}
+
+impl ModelCost {
+    /// `true` if this cost definition matches the given model.
+    pub fn matches(&self, model_id: &str, for_completion: bool) -> bool {
+        self.for_completion == for_completion && self.model_id.compiled().is_match(model_id)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use chrono::{TimeZone, Utc};
@@ -244,6 +290,27 @@ mod tests {
 
     use super::*;
 
+    #[test]
+    fn test_model_cost_config() {
+        let original = r#"{"version":1,"costs":[{"modelId":"babbage-002.ft-*","forCompletion":false,"costPer1kTokens":0.0016}]}"#;
+        let deserialized: ModelCosts = serde_json::from_str(original).unwrap();
+        assert_debug_snapshot!(deserialized, @r###"
+        ModelCosts {
+            version: 1,
+            costs: [
+                ModelCost {
+                    model_id: LazyGlob("babbage-002.ft-*"),
+                    for_completion: false,
+                    cost_per_1k_tokens: 0.0016,
+                },
+            ],
+        }
+        "###);
+
+        let serialized = serde_json::to_string(&deserialized).unwrap();
+        assert_eq!(&serialized, original);
+    }
+
     #[test]
     fn test_merge_builtin_measurement_keys() {
         let foo = BuiltinMeasurementKey::new("foo", MetricUnit::Duration(DurationUnit::Hour));
diff --git a/relay-event-normalization/src/normalize/span/ai.rs b/relay-event-normalization/src/normalize/span/ai.rs
new file mode 100644
index 0000000000..afdf72b9e5
--- /dev/null
+++ b/relay-event-normalization/src/normalize/span/ai.rs
@@ -0,0 +1,91 @@
+//! AI cost calculation.
+
+use crate::ModelCosts;
+use relay_base_schema::metrics::MetricUnit;
+use relay_event_schema::protocol::{Event, Measurement, Span};
+
+/// Calculated cost is in US dollars.
+fn calculate_ai_model_cost(
+    model_id: &str,
+    prompt_tokens_used: Option<f64>,
+    completion_tokens_used: Option<f64>,
+    total_tokens_used: Option<f64>,
+    ai_model_costs: &ModelCosts,
+) -> Option<f64> {
+    if let Some(prompt_tokens) = prompt_tokens_used {
+        if let Some(completion_tokens) = completion_tokens_used {
+            let mut result = 0.0;
+            if let Some(cost_per_1k) = ai_model_costs.cost_per_1k_tokens(model_id, false) {
+                result += cost_per_1k * (prompt_tokens / 1000.0)
+            }
+            if let Some(cost_per_1k) = ai_model_costs.cost_per_1k_tokens(model_id, true) {
+                result += cost_per_1k * (completion_tokens / 1000.0)
+            }
+            return Some(result);
+        }
+    }
+    if let Some(total_tokens) = total_tokens_used {
+        ai_model_costs
+            .cost_per_1k_tokens(model_id, false)
+            .map(|cost| cost * (total_tokens / 1000.0))
+    } else {
+        None
+    }
+}
+
+/// Extract the ai_total_cost measurement into the span.
+pub fn extract_ai_measurements(span: &mut Span, ai_model_costs: &ModelCosts) {
+    let Some(span_op) = span.op.value() else {
+        return;
+    };
+
+    if !span_op.starts_with("ai.") {
+        return;
+    }
+
+    let Some(measurements) = span.measurements.value() else {
+        return;
+    };
+
+    let total_tokens_used = measurements.get_value("ai_total_tokens_used");
+    let prompt_tokens_used = measurements.get_value("ai_prompt_tokens_used");
+    let completion_tokens_used = measurements.get_value("ai_completion_tokens_used");
+    if let Some(model_id) = span
+        .data
+        .value()
+        .and_then(|d| d.ai_model_id.value())
+        .and_then(|val| val.as_str())
+    {
+        if let Some(total_cost) = calculate_ai_model_cost(
+            model_id,
+            prompt_tokens_used,
+            completion_tokens_used,
+            total_tokens_used,
+            ai_model_costs,
+        ) {
+            span.measurements
+                .get_or_insert_with(Default::default)
+                .insert(
+                    "ai_total_cost".to_owned(),
+                    Measurement {
+                        value: total_cost.into(),
+                        unit: MetricUnit::None.into(),
+                    }
+                    .into(),
+                );
+        }
+    }
+}
+
+/// Extract the ai_total_cost measurements from all of an event's spans
+pub fn normalize_ai_measurements(event: &mut Event, model_costs: Option<&ModelCosts>) {
+    if let Some(model_costs) = model_costs {
+        if let Some(spans) = event.spans.value_mut() {
+            for span in spans {
+                if let Some(mut_span) = span.value_mut() {
+                    extract_ai_measurements(mut_span, model_costs);
+                }
+            }
+        }
+    }
+}
diff --git a/relay-event-normalization/src/normalize/span/mod.rs b/relay-event-normalization/src/normalize/span/mod.rs
index a1496e5d0c..f4cdf91486 100644
--- a/relay-event-normalization/src/normalize/span/mod.rs
+++ b/relay-event-normalization/src/normalize/span/mod.rs
@@ -1,5 +1,6 @@
 //! Span normalization logic.
 
+pub mod ai;
 pub mod description;
 pub mod exclusive_time;
 pub mod tag_extraction;
diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs
index e95d9eebb6..383251e218 100644
--- a/relay-event-schema/src/protocol/span.rs
+++ b/relay-event-schema/src/protocol/span.rs
@@ -266,22 +266,14 @@ pub struct SpanData {
     #[metastructure(field = "ai.pipeline.name")]
     pub ai_pipeline_name: Annotated<Value>,
 
+    /// The Model ID of an AI pipeline, e.g., gpt-4
+    #[metastructure(field = "ai.model_id")]
+    pub ai_model_id: Annotated<Value>,
+
     /// The input messages to an AI model call
     #[metastructure(field = "ai.input_messages")]
     pub ai_input_messages: Annotated<Value>,
 
-    /// The number of tokens used to generate the response to an AI call
-    #[metastructure(field = "ai.completion_tokens.used", pii = "false")]
-    pub ai_completion_tokens_used: Annotated<Value>,
-
-    /// The number of tokens used to process a request for an AI call
-    #[metastructure(field = "ai.prompt_tokens.used", pii = "false")]
-    pub ai_prompt_tokens_used: Annotated<Value>,
-
-    /// The total number of tokens used to for an AI call
-    #[metastructure(field = "ai.total_tokens.used", pii = "false")]
-    pub ai_total_tokens_used: Annotated<Value>,
-
     /// The responses to an AI model call
     #[metastructure(field = "ai.responses")]
     pub ai_responses: Annotated<Value>,
@@ -625,10 +617,8 @@ mod tests {
             cache_item_size: ~,
             http_response_status_code: ~,
             ai_pipeline_name: ~,
+            ai_model_id: ~,
             ai_input_messages: ~,
-            ai_completion_tokens_used: ~,
-            ai_prompt_tokens_used: ~,
-            ai_total_tokens_used: ~,
             ai_responses: ~,
             thread_name: ~,
             segment_name: ~,
diff --git a/relay-event-schema/src/protocol/span/convert.rs b/relay-event-schema/src/protocol/span/convert.rs
index f6163b24a2..0bfeb32a4d 100644
--- a/relay-event-schema/src/protocol/span/convert.rs
+++ b/relay-event-schema/src/protocol/span/convert.rs
@@ -290,10 +290,8 @@ mod tests {
                 cache_item_size: ~,
                 http_response_status_code: ~,
                 ai_pipeline_name: ~,
+                ai_model_id: ~,
                 ai_input_messages: ~,
-                ai_completion_tokens_used: ~,
-                ai_prompt_tokens_used: ~,
-                ai_total_tokens_used: ~,
                 ai_responses: ~,
                 thread_name: ~,
                 segment_name: "my 1st transaction",
diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs
index 7850f1481a..9e379db759 100644
--- a/relay-pii/src/processor.rs
+++ b/relay-pii/src/processor.rs
@@ -472,7 +472,7 @@ mod tests {
         Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
         NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
     };
-    use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object, Val};
+    use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object};
     use serde_json::json;
 
     use super::*;
@@ -1337,43 +1337,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_ai_token_values() {
-        let mut span = Span::from_value(
-            json!({
-                "data": {
-                    "ai.total_tokens.used": 30,
-                    "ai.prompt_tokens.used": 20,
-                    "ai.completion_tokens.used": 10,
-                }
-            })
-            .into(),
-        );
-
-        let pii_config = serde_json::from_value::<PiiConfig>(json!({
-            "applications": {
-                "$object": ["@password"],
-            }
-        }))
-        .expect("invalid json config");
-
-        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
-        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
-
-        assert_eq!(
-            Val::from(get_value!(span.data.ai_total_tokens_used!)).as_u64(),
-            Some(30),
-        );
-        assert_eq!(
-            Val::from(get_value!(span.data.ai_prompt_tokens_used!)).as_u64(),
-            Some(20),
-        );
-        assert_eq!(
-            Val::from(get_value!(span.data.ai_completion_tokens_used!)).as_u64(),
-            Some(10),
-        );
-    }
-
     #[test]
     fn test_scrub_breadcrumb_data_http_not_scrubbed() {
         let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
diff --git a/relay-server/src/metrics_extraction/event.rs b/relay-server/src/metrics_extraction/event.rs
index 8858eb33a4..83718c1659 100644
--- a/relay-server/src/metrics_extraction/event.rs
+++ b/relay-server/src/metrics_extraction/event.rs
@@ -1124,6 +1124,9 @@ mod tests {
                     "measurements": {
                         "ai_total_tokens_used": {
                             "value": 20
+                        },
+                        "ai_total_cost": {
+                            "value": 0.0002
                         }
                     },
                     "data": {
diff --git a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics.snap b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics.snap
index 77fa81b97c..2c7aa8cb4d 100644
--- a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics.snap
+++ b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics.snap
@@ -7526,6 +7526,32 @@ expression: metrics
             ),
         },
     },
+    Bucket {
+        timestamp: UnixTimestamp(1702474613),
+        width: 0,
+        name: MetricName(
+            "c:spans/ai.total_cost@usd",
+        ),
+        value: Counter(
+            0.0002,
+        ),
+        tags: {
+            "environment": "fake_environment",
+            "release": "1.2.3",
+            "span.ai.pipeline.group": "86148ae2d6c09430",
+            "span.category": "ai",
+            "span.description": "ConcurrentStream",
+            "span.group": "fdd5a729aef245ba",
+            "span.op": "ai.run.langchain",
+            "span.origin": "auto.langchain",
+        },
+        metadata: BucketMetadata {
+            merges: 1,
+            received_at: Some(
+                UnixTimestamp(0),
+            ),
+        },
+    },
     Bucket {
         timestamp: UnixTimestamp(1702474613),
         width: 0,
diff --git a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap
index bef9b78b0f..0f8b07cf51 100644
--- a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap
+++ b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap
@@ -96,10 +96,8 @@ expression: "(&event.value().unwrap().spans, metrics)"
                 cache_item_size: ~,
                 http_response_status_code: ~,
                 ai_pipeline_name: ~,
+                ai_model_id: ~,
                 ai_input_messages: ~,
-                ai_completion_tokens_used: ~,
-                ai_prompt_tokens_used: ~,
-                ai_total_tokens_used: ~,
                 ai_responses: ~,
                 thread_name: ~,
                 segment_name: ~,
@@ -413,10 +411,8 @@ expression: "(&event.value().unwrap().spans, metrics)"
                 cache_item_size: ~,
                 http_response_status_code: ~,
                 ai_pipeline_name: ~,
+                ai_model_id: ~,
                 ai_input_messages: ~,
-                ai_completion_tokens_used: ~,
-                ai_prompt_tokens_used: ~,
-                ai_total_tokens_used: ~,
                 ai_responses: ~,
                 thread_name: ~,
                 segment_name: ~,
@@ -508,10 +504,8 @@ expression: "(&event.value().unwrap().spans, metrics)"
                 cache_item_size: ~,
                 http_response_status_code: ~,
                 ai_pipeline_name: ~,
+                ai_model_id: ~,
                 ai_input_messages: ~,
-                ai_completion_tokens_used: ~,
-                ai_prompt_tokens_used: ~,
-                ai_total_tokens_used: ~,
                 ai_responses: ~,
                 thread_name: ~,
                 segment_name: ~,
@@ -651,10 +645,8 @@ expression: "(&event.value().unwrap().spans, metrics)"
                     "200",
                 ),
                 ai_pipeline_name: ~,
+                ai_model_id: ~,
                 ai_input_messages: ~,
-                ai_completion_tokens_used: ~,
-                ai_prompt_tokens_used: ~,
-                ai_total_tokens_used: ~,
                 ai_responses: ~,
                 thread_name: ~,
                 segment_name: ~,
@@ -746,10 +738,8 @@ expression: "(&event.value().unwrap().spans, metrics)"
                     200,
                 ),
                 ai_pipeline_name: ~,
+                ai_model_id: ~,
                 ai_input_messages: ~,
-                ai_completion_tokens_used: ~,
-                ai_prompt_tokens_used: ~,
-                ai_total_tokens_used: ~,
                 ai_responses: ~,
                 thread_name: ~,
                 segment_name: ~,
diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs
index db2025f83c..a9c7b427ee 100644
--- a/relay-server/src/services/processor.rs
+++ b/relay-server/src/services/processor.rs
@@ -1318,6 +1318,7 @@ impl EnvelopeProcessorService {
             .aggregator_config_for(MetricNamespace::Transactions);
 
         let global_config = self.inner.global_config.current();
+        let ai_model_costs = global_config.ai_model_costs.clone().ok();
 
         utils::log_transaction_name_metrics(&mut state.event, |event| {
             let tx_validation_config = TransactionValidationConfig {
@@ -1387,6 +1388,7 @@ impl EnvelopeProcessorService {
                 emit_event_errors: full_normalization,
                 span_description_rules: state.project_state.config.span_description_rules.as_ref(),
                 geoip_lookup: self.inner.geoip_lookup.as_ref(),
+                ai_model_costs: ai_model_costs.as_ref(),
                 enable_trimming: true,
                 measurements: Some(CombinedMeasurementsConfig::new(
                     state.project_state.config().measurements.as_ref(),
diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs
index d7f7eb4123..3779a96b76 100644
--- a/relay-server/src/services/processor/span/processing.rs
+++ b/relay-server/src/services/processor/span/processing.rs
@@ -9,12 +9,12 @@ use relay_config::Config;
 use relay_dynamic_config::{
     CombinedMetricExtractionConfig, ErrorBoundary, Feature, GlobalConfig, ProjectConfig,
 };
-use relay_event_normalization::normalize_transaction_name;
 use relay_event_normalization::{
     normalize_measurements, normalize_performance_score, normalize_user_agent_info_generic,
     span::tag_extraction, validate_span, CombinedMeasurementsConfig, MeasurementsConfig,
     PerformanceScoreConfig, RawUserAgentInfo, TransactionsProcessor,
 };
+use relay_event_normalization::{normalize_transaction_name, ModelCosts};
 use relay_event_schema::processor::{process_value, ProcessingState};
 use relay_event_schema::protocol::{BrowserContext, Contexts, Event, Span, SpanData};
 use relay_log::protocol::{Attachment, AttachmentType};
@@ -33,6 +33,7 @@ use crate::services::processor::{
 };
 use crate::statsd::{RelayCounters, RelayHistograms};
 use crate::utils::{sample, BufferGuard, ItemAction};
+use relay_event_normalization::span::ai::extract_ai_measurements;
 use thiserror::Error;
 
 #[derive(Error, Debug)]
@@ -52,12 +53,14 @@ pub fn process(
         ErrorBoundary::Ok(ref config) if config.is_enabled() => Some(config),
         _ => None,
     };
+    let ai_model_costs_config = global_config.ai_model_costs.clone().ok();
     let normalize_span_config = get_normalize_span_config(
         config,
         state.managed_envelope.received_at(),
         global_config.measurements.as_ref(),
         state.project_state.config().measurements.as_ref(),
         state.project_state.config().performance_score.as_ref(),
+        ai_model_costs_config.as_ref(),
     );
 
     let meta = state.managed_envelope.envelope().meta();
@@ -382,6 +385,8 @@ struct NormalizeSpanConfig<'a> {
     /// If at least one is provided, then normalization will truncate custom measurements
     /// and add units of known built-in measurements.
     measurements: Option<CombinedMeasurementsConfig<'a>>,
+    /// Configuration for AI model cost calculation
+    ai_model_costs: Option<&'a ModelCosts>,
     /// The maximum length for names of custom measurements.
     ///
     /// Measurements with longer names are removed from the transaction event and replaced with a
@@ -395,6 +400,7 @@ fn get_normalize_span_config<'a>(
     global_measurements_config: Option<&'a MeasurementsConfig>,
     project_measurements_config: Option<&'a MeasurementsConfig>,
     performance_score: Option<&'a PerformanceScoreConfig>,
+    ai_model_costs: Option<&'a ModelCosts>,
 ) -> NormalizeSpanConfig<'a> {
     let aggregator_config =
         AggregatorConfig::from(config.aggregator_config_for(MetricNamespace::Spans));
@@ -415,6 +421,7 @@ fn get_normalize_span_config<'a>(
                 .saturating_sub(MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD),
         ),
         performance_score,
+        ai_model_costs,
     }
 }
 
@@ -464,6 +471,7 @@ fn normalize(
         max_tag_value_size,
         performance_score,
         measurements,
+        ai_model_costs,
         max_name_and_unit_len,
     } = config;
 
@@ -543,6 +551,9 @@ fn normalize(
         ..Default::default()
     };
     normalize_performance_score(&mut event, performance_score);
+    if let Some(model_costs_config) = ai_model_costs {
+        extract_ai_measurements(span, model_costs_config);
+    }
     span.measurements = event.measurements;
 
     tag_extraction::extract_measurements(span, is_mobile);
diff --git a/relay-spans/src/span.rs b/relay-spans/src/span.rs
index 965a65d008..be37386c32 100644
--- a/relay-spans/src/span.rs
+++ b/relay-spans/src/span.rs
@@ -646,10 +646,8 @@ mod tests {
                 cache_item_size: ~,
                 http_response_status_code: ~,
                 ai_pipeline_name: ~,
+                ai_model_id: ~,
                 ai_input_messages: ~,
-                ai_completion_tokens_used: ~,
-                ai_prompt_tokens_used: ~,
-                ai_total_tokens_used: ~,
                 ai_responses: ~,
                 thread_name: ~,
                 segment_name: "my 1st transaction",