diff --git a/CHANGELOG.md b/CHANGELOG.md index 1aec97e485..7312fc1403 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ - Add AI model costs to global config. ([#3579](https://github.com/getsentry/relay/pull/3579)) - Add support for `event.` in the `Span` `Getter` implementation. ([#3577](https://github.com/getsentry/relay/pull/3577)) - Ensure `chunk_id` and `profiler_id` are UUIDs and sort samples. ([#3588](https://github.com/getsentry/relay/pull/3588)) +- Add a calculated measurement based on the AI model and the tokens used. ([#3554](https://github.com/getsentry/relay/pull/3554)) + ## 24.4.2 diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index 95ae539e29..c5d3f8b81e 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -273,7 +273,8 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( max_tag_value_length: usize::MAX, span_description_rules: None, performance_score: None, - geoip_lookup: None, // only supported in relay + geoip_lookup: None, // only supported in relay + ai_model_costs: None, // only supported in relay enable_trimming: config.enable_trimming.unwrap_or_default(), measurements: None, normalize_spans: config.normalize_spans, diff --git a/relay-dynamic-config/src/ai.rs b/relay-dynamic-config/src/ai.rs deleted file mode 100644 index a9b340c68c..0000000000 --- a/relay-dynamic-config/src/ai.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Configuration for measurements generated from AI model instrumentation. - -use relay_common::glob2::LazyGlob; -use serde::{Deserialize, Serialize}; - -const MAX_SUPPORTED_VERSION: u16 = 1; - -#[derive(Clone, Default, Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct ModelCosts { - pub version: u16, - #[serde(skip_serializing_if = "Vec::is_empty")] - pub costs: Vec, -} - -impl ModelCosts { - /// `false` if measurement and metrics extraction should be skipped. - pub fn is_enabled(&self) -> bool { - self.version > 0 && self.version <= MAX_SUPPORTED_VERSION - } - - /// Gets the cost per 1000 tokens, if defined for the given model. - pub fn cost_per_1k_tokens(&self, model_id: &str, for_completion: bool) -> Option { - self.costs - .iter() - .find(|cost| cost.matches(model_id, for_completion)) - .map(|c| c.cost_per_1k_tokens) - } -} - -#[derive(Clone, Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct ModelCost { - model_id: LazyGlob, - for_completion: bool, - cost_per_1k_tokens: f32, -} - -impl ModelCost { - /// `true` if this cost definition matches the given model. - pub fn matches(&self, model_id: &str, for_completion: bool) -> bool { - self.for_completion == for_completion && self.model_id.compiled().is_match(model_id) - } -} - -#[cfg(test)] -mod tests { - use insta::assert_debug_snapshot; - - use super::*; - - #[test] - fn roundtrip() { - let original = r#"{"version":1,"costs":[{"modelId":"babbage-002.ft-*","forCompletion":false,"costPer1kTokens":0.0016}]}"#; - let deserialized: ModelCosts = serde_json::from_str(original).unwrap(); - assert_debug_snapshot!(deserialized, @r###" - ModelCosts { - version: 1, - costs: [ - ModelCost { - model_id: LazyGlob("babbage-002.ft-*"), - for_completion: false, - cost_per_1k_tokens: 0.0016, - }, - ], - } - "###); - - let serialized = serde_json::to_string(&deserialized).unwrap(); - // Patch floating point - assert_eq!(&serialized, original); - } -} diff --git a/relay-dynamic-config/src/defaults.rs b/relay-dynamic-config/src/defaults.rs index c0a6e6006e..6c6140f9f0 100644 --- a/relay-dynamic-config/src/defaults.rs +++ b/relay-dynamic-config/src/defaults.rs @@ -505,6 +505,41 @@ pub fn hardcoded_span_metrics() -> Vec<(String, Vec)> { .always(), // already guarded by condition on metric ], }, + MetricSpec { + category: DataCategory::Span, + mri: "c:spans/ai.total_cost@usd".into(), + field: Some("span.measurements.ai_total_cost.value".into()), + condition: Some(is_ai.clone()), + tags: vec![ + Tag::with_key("span.op") + .from_field("span.sentry_tags.op") + .always(), + Tag::with_key("environment") + .from_field("span.sentry_tags.environment") + .always(), + Tag::with_key("release") + .from_field("span.sentry_tags.release") + .always(), + Tag::with_key("span.origin") + .from_field("span.origin") + .always(), + Tag::with_key("span.category") + .from_field("span.sentry_tags.category") + .always(), // already guarded by condition on metric + Tag::with_key("span.ai.pipeline.group") + .from_field("span.sentry_tags.ai_pipeline_group") + .always(), // already guarded by condition on metric + Tag::with_key("span.description") + .from_field("span.sentry_tags.description") + .always(), // already guarded by condition on metric + Tag::with_key("span.group") + .from_field("span.sentry_tags.group") + .always(), // already guarded by condition on metric + Tag::with_key("span.op") + .from_field("span.sentry_tags.op") + .always(), // already guarded by condition on metric + ], + }, MetricSpec { category: DataCategory::Span, mri: "d:spans/webvital.score.total@ratio".into(), diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index fec6b5c6e6..e25eed2d9e 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -5,14 +5,13 @@ use std::io::BufReader; use std::path::Path; use relay_base_schema::metrics::MetricNamespace; -use relay_event_normalization::MeasurementsConfig; +use relay_event_normalization::{MeasurementsConfig, ModelCosts}; use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{de, Deserialize, Serialize}; use serde_json::Value; -use crate::ai::ModelCosts; -use crate::{ai, defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups}; +use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups}; /// A dynamic configuration for all Relays passed down from Sentry. /// @@ -50,7 +49,7 @@ pub struct GlobalConfig { /// Configuration for AI span measurements. #[serde(skip_serializing_if = "is_missing")] - pub ai_model_costs: ErrorBoundary, + pub ai_model_costs: ErrorBoundary, } impl GlobalConfig { @@ -406,7 +405,7 @@ fn is_ok_and_empty(value: &ErrorBoundary) -> bool { ) } -fn is_missing(value: &ErrorBoundary) -> bool { +fn is_missing(value: &ErrorBoundary) -> bool { matches!( value, &ErrorBoundary::Ok(ModelCosts{ version, ref costs }) if version == 0 && costs.is_empty() diff --git a/relay-dynamic-config/src/lib.rs b/relay-dynamic-config/src/lib.rs index cc84c630d6..212d63cce0 100644 --- a/relay-dynamic-config/src/lib.rs +++ b/relay-dynamic-config/src/lib.rs @@ -61,7 +61,6 @@ )] #![allow(clippy::derive_partial_eq_without_eq)] -mod ai; mod defaults; mod error_boundary; mod feature; diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 083c3ce35c..47cfda5c3a 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -28,12 +28,14 @@ use smallvec::SmallVec; use uuid::Uuid; use crate::normalize::request; +use crate::span::ai::normalize_ai_measurements; use crate::span::tag_extraction::extract_span_tags_from_event; use crate::utils::{self, get_event_user_tag, MAX_DURATION_MOBILE_MS}; use crate::{ breakdowns, event_error, legacy, mechanism, remove_other, schema, span, stacktrace, transactions, trimming, user_agent, BreakdownsConfig, CombinedMeasurementsConfig, GeoIpLookup, - MaxChars, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, TransactionNameConfig, + MaxChars, ModelCosts, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, + TransactionNameConfig, }; /// Configuration for [`normalize_event`]. @@ -132,6 +134,9 @@ pub struct NormalizationConfig<'a> { /// Configuration for generating performance score measurements for web vitals pub performance_score: Option<&'a PerformanceScoreConfig>, + /// Configuration for calculating the cost of AI model runs + pub ai_model_costs: Option<&'a ModelCosts>, + /// An initialized GeoIP lookup. pub geoip_lookup: Option<&'a GeoIpLookup>, @@ -175,6 +180,7 @@ impl<'a> Default for NormalizationConfig<'a> { span_description_rules: Default::default(), performance_score: Default::default(), geoip_lookup: Default::default(), + ai_model_costs: Default::default(), enable_trimming: false, measurements: None, normalize_spans: true, @@ -292,6 +298,7 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) { config.max_name_and_unit_len, ); // Measurements are part of the metric extraction normalize_performance_score(event, config.performance_score); + normalize_ai_measurements(event, config.ai_model_costs); normalize_breakdowns(event, config.breakdowns_config); // Breakdowns are part of the metric extraction too normalize_default_attributes(event, meta, config); @@ -1411,6 +1418,7 @@ mod tests { use insta::{assert_debug_snapshot, assert_json_snapshot}; use itertools::Itertools; + use relay_common::glob2::LazyGlob; use relay_event_schema::protocol::{ Breadcrumb, Csp, DebugMeta, DeviceContext, MetricSummary, MetricsSummary, Span, Values, }; @@ -1418,7 +1426,7 @@ mod tests { use serde_json::json; use super::*; - use crate::{ClientHints, MeasurementsConfig}; + use crate::{ClientHints, MeasurementsConfig, ModelCost}; const IOS_MOBILE_EVENT: &str = r#" { @@ -2087,6 +2095,111 @@ mod tests { "###); } + #[test] + fn test_ai_measurements() { + let json = r#" + { + "spans": [ + { + "timestamp": 1702474613.0495, + "start_timestamp": 1702474613.0175, + "description": "OpenAI ", + "op": "ai.chat_completions.openai", + "span_id": "9c01bd820a083e63", + "parent_span_id": "a1e13f3f06239d69", + "trace_id": "922dda2462ea4ac2b6a4b339bee90863", + "measurements": { + "ai_total_tokens_used": { + "value": 1230 + } + }, + "data": { + "ai.pipeline.name": "Autofix Pipeline", + "ai.model_id": "claude-2.1" + } + }, + { + "timestamp": 1702474613.0495, + "start_timestamp": 1702474613.0175, + "description": "OpenAI ", + "op": "ai.chat_completions.openai", + "span_id": "ac01bd820a083e63", + "parent_span_id": "a1e13f3f06239d69", + "trace_id": "922dda2462ea4ac2b6a4b339bee90863", + "measurements": { + "ai_prompt_tokens_used": { + "value": 1000 + }, + "ai_completion_tokens_used": { + "value": 2000 + } + }, + "data": { + "ai.pipeline.name": "Autofix Pipeline", + "ai.model_id": "gpt4-21-04" + } + } + ] + } + "#; + + let mut event = Annotated::::from_json(json).unwrap(); + + normalize_event( + &mut event, + &NormalizationConfig { + ai_model_costs: Some(&ModelCosts { + version: 1, + costs: vec![ + ModelCost { + model_id: LazyGlob::new("claude-2*".into()), + for_completion: false, + cost_per_1k_tokens: 1.0, + }, + ModelCost { + model_id: LazyGlob::new("gpt4-21*".into()), + for_completion: false, + cost_per_1k_tokens: 2.0, + }, + ModelCost { + model_id: LazyGlob::new("gpt4-21*".into()), + for_completion: true, + cost_per_1k_tokens: 20.0, + }, + ], + }), + ..NormalizationConfig::default() + }, + ); + + let spans = event.value().unwrap().spans.value().unwrap(); + assert_eq!(spans.len(), 2); + assert_eq!( + spans + .first() + .unwrap() + .value() + .unwrap() + .measurements + .value() + .unwrap() + .get_value("ai_total_cost"), + Some(1.23) + ); + assert_eq!( + spans + .get(1) + .unwrap() + .value() + .unwrap() + .measurements + .value() + .unwrap() + .get_value("ai_total_cost"), + Some(20.0 * 2.0 + 2.0) + ); + } + #[test] fn test_apple_high_device_class() { let mut event = Event { diff --git a/relay-event-normalization/src/normalize/mod.rs b/relay-event-normalization/src/normalize/mod.rs index edf6b5cae3..57c43b4fa8 100644 --- a/relay-event-normalization/src/normalize/mod.rs +++ b/relay-event-normalization/src/normalize/mod.rs @@ -1,6 +1,7 @@ use std::hash::Hash; use relay_base_schema::metrics::MetricUnit; +use relay_common::glob2::LazyGlob; use relay_event_schema::protocol::{Event, VALID_PLATFORMS}; use relay_protocol::RuleCondition; use serde::{Deserialize, Serialize}; @@ -215,6 +216,51 @@ pub struct PerformanceScoreConfig { pub profiles: Vec, } +/// A mapping of AI model types (like GPT-4) to their respective costs. +#[derive(Clone, Default, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ModelCosts { + /// The version of the model cost struct + pub version: u16, + + /// The mappings of model ID => cost + #[serde(skip_serializing_if = "Vec::is_empty")] + pub costs: Vec, +} + +impl ModelCosts { + const MAX_SUPPORTED_VERSION: u16 = 1; + + /// `false` if measurement and metrics extraction should be skipped. + pub fn is_enabled(&self) -> bool { + self.version > 0 && self.version <= ModelCosts::MAX_SUPPORTED_VERSION + } + + /// Gets the cost per 1000 tokens, if defined for the given model. + pub fn cost_per_1k_tokens(&self, model_id: &str, for_completion: bool) -> Option { + self.costs + .iter() + .find(|cost| cost.matches(model_id, for_completion)) + .map(|c| c.cost_per_1k_tokens) + } +} + +/// A single mapping of (AI model ID, input/output, cost) +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ModelCost { + pub(crate) model_id: LazyGlob, + pub(crate) for_completion: bool, + pub(crate) cost_per_1k_tokens: f64, +} + +impl ModelCost { + /// `true` if this cost definition matches the given model. + pub fn matches(&self, model_id: &str, for_completion: bool) -> bool { + self.for_completion == for_completion && self.model_id.compiled().is_match(model_id) + } +} + #[cfg(test)] mod tests { use chrono::{TimeZone, Utc}; @@ -244,6 +290,27 @@ mod tests { use super::*; + #[test] + fn test_model_cost_config() { + let original = r#"{"version":1,"costs":[{"modelId":"babbage-002.ft-*","forCompletion":false,"costPer1kTokens":0.0016}]}"#; + let deserialized: ModelCosts = serde_json::from_str(original).unwrap(); + assert_debug_snapshot!(deserialized, @r###" + ModelCosts { + version: 1, + costs: [ + ModelCost { + model_id: LazyGlob("babbage-002.ft-*"), + for_completion: false, + cost_per_1k_tokens: 0.0016, + }, + ], + } + "###); + + let serialized = serde_json::to_string(&deserialized).unwrap(); + assert_eq!(&serialized, original); + } + #[test] fn test_merge_builtin_measurement_keys() { let foo = BuiltinMeasurementKey::new("foo", MetricUnit::Duration(DurationUnit::Hour)); diff --git a/relay-event-normalization/src/normalize/span/ai.rs b/relay-event-normalization/src/normalize/span/ai.rs new file mode 100644 index 0000000000..afdf72b9e5 --- /dev/null +++ b/relay-event-normalization/src/normalize/span/ai.rs @@ -0,0 +1,91 @@ +//! AI cost calculation. + +use crate::ModelCosts; +use relay_base_schema::metrics::MetricUnit; +use relay_event_schema::protocol::{Event, Measurement, Span}; + +/// Calculated cost is in US dollars. +fn calculate_ai_model_cost( + model_id: &str, + prompt_tokens_used: Option, + completion_tokens_used: Option, + total_tokens_used: Option, + ai_model_costs: &ModelCosts, +) -> Option { + if let Some(prompt_tokens) = prompt_tokens_used { + if let Some(completion_tokens) = completion_tokens_used { + let mut result = 0.0; + if let Some(cost_per_1k) = ai_model_costs.cost_per_1k_tokens(model_id, false) { + result += cost_per_1k * (prompt_tokens / 1000.0) + } + if let Some(cost_per_1k) = ai_model_costs.cost_per_1k_tokens(model_id, true) { + result += cost_per_1k * (completion_tokens / 1000.0) + } + return Some(result); + } + } + if let Some(total_tokens) = total_tokens_used { + ai_model_costs + .cost_per_1k_tokens(model_id, false) + .map(|cost| cost * (total_tokens / 1000.0)) + } else { + None + } +} + +/// Extract the ai_total_cost measurement into the span. +pub fn extract_ai_measurements(span: &mut Span, ai_model_costs: &ModelCosts) { + let Some(span_op) = span.op.value() else { + return; + }; + + if !span_op.starts_with("ai.") { + return; + } + + let Some(measurements) = span.measurements.value() else { + return; + }; + + let total_tokens_used = measurements.get_value("ai_total_tokens_used"); + let prompt_tokens_used = measurements.get_value("ai_prompt_tokens_used"); + let completion_tokens_used = measurements.get_value("ai_completion_tokens_used"); + if let Some(model_id) = span + .data + .value() + .and_then(|d| d.ai_model_id.value()) + .and_then(|val| val.as_str()) + { + if let Some(total_cost) = calculate_ai_model_cost( + model_id, + prompt_tokens_used, + completion_tokens_used, + total_tokens_used, + ai_model_costs, + ) { + span.measurements + .get_or_insert_with(Default::default) + .insert( + "ai_total_cost".to_owned(), + Measurement { + value: total_cost.into(), + unit: MetricUnit::None.into(), + } + .into(), + ); + } + } +} + +/// Extract the ai_total_cost measurements from all of an event's spans +pub fn normalize_ai_measurements(event: &mut Event, model_costs: Option<&ModelCosts>) { + if let Some(model_costs) = model_costs { + if let Some(spans) = event.spans.value_mut() { + for span in spans { + if let Some(mut_span) = span.value_mut() { + extract_ai_measurements(mut_span, model_costs); + } + } + } + } +} diff --git a/relay-event-normalization/src/normalize/span/mod.rs b/relay-event-normalization/src/normalize/span/mod.rs index a1496e5d0c..f4cdf91486 100644 --- a/relay-event-normalization/src/normalize/span/mod.rs +++ b/relay-event-normalization/src/normalize/span/mod.rs @@ -1,5 +1,6 @@ //! Span normalization logic. +pub mod ai; pub mod description; pub mod exclusive_time; pub mod tag_extraction; diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs index e95d9eebb6..383251e218 100644 --- a/relay-event-schema/src/protocol/span.rs +++ b/relay-event-schema/src/protocol/span.rs @@ -266,22 +266,14 @@ pub struct SpanData { #[metastructure(field = "ai.pipeline.name")] pub ai_pipeline_name: Annotated, + /// The Model ID of an AI pipeline, e.g., gpt-4 + #[metastructure(field = "ai.model_id")] + pub ai_model_id: Annotated, + /// The input messages to an AI model call #[metastructure(field = "ai.input_messages")] pub ai_input_messages: Annotated, - /// The number of tokens used to generate the response to an AI call - #[metastructure(field = "ai.completion_tokens.used", pii = "false")] - pub ai_completion_tokens_used: Annotated, - - /// The number of tokens used to process a request for an AI call - #[metastructure(field = "ai.prompt_tokens.used", pii = "false")] - pub ai_prompt_tokens_used: Annotated, - - /// The total number of tokens used to for an AI call - #[metastructure(field = "ai.total_tokens.used", pii = "false")] - pub ai_total_tokens_used: Annotated, - /// The responses to an AI model call #[metastructure(field = "ai.responses")] pub ai_responses: Annotated, @@ -625,10 +617,8 @@ mod tests { cache_item_size: ~, http_response_status_code: ~, ai_pipeline_name: ~, + ai_model_id: ~, ai_input_messages: ~, - ai_completion_tokens_used: ~, - ai_prompt_tokens_used: ~, - ai_total_tokens_used: ~, ai_responses: ~, thread_name: ~, segment_name: ~, diff --git a/relay-event-schema/src/protocol/span/convert.rs b/relay-event-schema/src/protocol/span/convert.rs index f6163b24a2..0bfeb32a4d 100644 --- a/relay-event-schema/src/protocol/span/convert.rs +++ b/relay-event-schema/src/protocol/span/convert.rs @@ -290,10 +290,8 @@ mod tests { cache_item_size: ~, http_response_status_code: ~, ai_pipeline_name: ~, + ai_model_id: ~, ai_input_messages: ~, - ai_completion_tokens_used: ~, - ai_prompt_tokens_used: ~, - ai_total_tokens_used: ~, ai_responses: ~, thread_name: ~, segment_name: "my 1st transaction", diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 7850f1481a..9e379db759 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -472,7 +472,7 @@ mod tests { Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message, NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext, }; - use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object, Val}; + use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object}; use serde_json::json; use super::*; @@ -1337,43 +1337,6 @@ mod tests { ); } - #[test] - fn test_ai_token_values() { - let mut span = Span::from_value( - json!({ - "data": { - "ai.total_tokens.used": 30, - "ai.prompt_tokens.used": 20, - "ai.completion_tokens.used": 10, - } - }) - .into(), - ); - - let pii_config = serde_json::from_value::(json!({ - "applications": { - "$object": ["@password"], - } - })) - .expect("invalid json config"); - - let mut pii_processor = PiiProcessor::new(pii_config.compiled()); - process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap(); - - assert_eq!( - Val::from(get_value!(span.data.ai_total_tokens_used!)).as_u64(), - Some(30), - ); - assert_eq!( - Val::from(get_value!(span.data.ai_prompt_tokens_used!)).as_u64(), - Some(20), - ); - assert_eq!( - Val::from(get_value!(span.data.ai_completion_tokens_used!)).as_u64(), - Some(10), - ); - } - #[test] fn test_scrub_breadcrumb_data_http_not_scrubbed() { let mut breadcrumb: Annotated = Annotated::from_json( diff --git a/relay-server/src/metrics_extraction/event.rs b/relay-server/src/metrics_extraction/event.rs index 8858eb33a4..83718c1659 100644 --- a/relay-server/src/metrics_extraction/event.rs +++ b/relay-server/src/metrics_extraction/event.rs @@ -1124,6 +1124,9 @@ mod tests { "measurements": { "ai_total_tokens_used": { "value": 20 + }, + "ai_total_cost": { + "value": 0.0002 } }, "data": { diff --git a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics.snap b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics.snap index 77fa81b97c..2c7aa8cb4d 100644 --- a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics.snap +++ b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics.snap @@ -7526,6 +7526,32 @@ expression: metrics ), }, }, + Bucket { + timestamp: UnixTimestamp(1702474613), + width: 0, + name: MetricName( + "c:spans/ai.total_cost@usd", + ), + value: Counter( + 0.0002, + ), + tags: { + "environment": "fake_environment", + "release": "1.2.3", + "span.ai.pipeline.group": "86148ae2d6c09430", + "span.category": "ai", + "span.description": "ConcurrentStream", + "span.group": "fdd5a729aef245ba", + "span.op": "ai.run.langchain", + "span.origin": "auto.langchain", + }, + metadata: BucketMetadata { + merges: 1, + received_at: Some( + UnixTimestamp(0), + ), + }, + }, Bucket { timestamp: UnixTimestamp(1702474613), width: 0, diff --git a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap index bef9b78b0f..0f8b07cf51 100644 --- a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap +++ b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap @@ -96,10 +96,8 @@ expression: "(&event.value().unwrap().spans, metrics)" cache_item_size: ~, http_response_status_code: ~, ai_pipeline_name: ~, + ai_model_id: ~, ai_input_messages: ~, - ai_completion_tokens_used: ~, - ai_prompt_tokens_used: ~, - ai_total_tokens_used: ~, ai_responses: ~, thread_name: ~, segment_name: ~, @@ -413,10 +411,8 @@ expression: "(&event.value().unwrap().spans, metrics)" cache_item_size: ~, http_response_status_code: ~, ai_pipeline_name: ~, + ai_model_id: ~, ai_input_messages: ~, - ai_completion_tokens_used: ~, - ai_prompt_tokens_used: ~, - ai_total_tokens_used: ~, ai_responses: ~, thread_name: ~, segment_name: ~, @@ -508,10 +504,8 @@ expression: "(&event.value().unwrap().spans, metrics)" cache_item_size: ~, http_response_status_code: ~, ai_pipeline_name: ~, + ai_model_id: ~, ai_input_messages: ~, - ai_completion_tokens_used: ~, - ai_prompt_tokens_used: ~, - ai_total_tokens_used: ~, ai_responses: ~, thread_name: ~, segment_name: ~, @@ -651,10 +645,8 @@ expression: "(&event.value().unwrap().spans, metrics)" "200", ), ai_pipeline_name: ~, + ai_model_id: ~, ai_input_messages: ~, - ai_completion_tokens_used: ~, - ai_prompt_tokens_used: ~, - ai_total_tokens_used: ~, ai_responses: ~, thread_name: ~, segment_name: ~, @@ -746,10 +738,8 @@ expression: "(&event.value().unwrap().spans, metrics)" 200, ), ai_pipeline_name: ~, + ai_model_id: ~, ai_input_messages: ~, - ai_completion_tokens_used: ~, - ai_prompt_tokens_used: ~, - ai_total_tokens_used: ~, ai_responses: ~, thread_name: ~, segment_name: ~, diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index db2025f83c..a9c7b427ee 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1318,6 +1318,7 @@ impl EnvelopeProcessorService { .aggregator_config_for(MetricNamespace::Transactions); let global_config = self.inner.global_config.current(); + let ai_model_costs = global_config.ai_model_costs.clone().ok(); utils::log_transaction_name_metrics(&mut state.event, |event| { let tx_validation_config = TransactionValidationConfig { @@ -1387,6 +1388,7 @@ impl EnvelopeProcessorService { emit_event_errors: full_normalization, span_description_rules: state.project_state.config.span_description_rules.as_ref(), geoip_lookup: self.inner.geoip_lookup.as_ref(), + ai_model_costs: ai_model_costs.as_ref(), enable_trimming: true, measurements: Some(CombinedMeasurementsConfig::new( state.project_state.config().measurements.as_ref(), diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index d7f7eb4123..3779a96b76 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -9,12 +9,12 @@ use relay_config::Config; use relay_dynamic_config::{ CombinedMetricExtractionConfig, ErrorBoundary, Feature, GlobalConfig, ProjectConfig, }; -use relay_event_normalization::normalize_transaction_name; use relay_event_normalization::{ normalize_measurements, normalize_performance_score, normalize_user_agent_info_generic, span::tag_extraction, validate_span, CombinedMeasurementsConfig, MeasurementsConfig, PerformanceScoreConfig, RawUserAgentInfo, TransactionsProcessor, }; +use relay_event_normalization::{normalize_transaction_name, ModelCosts}; use relay_event_schema::processor::{process_value, ProcessingState}; use relay_event_schema::protocol::{BrowserContext, Contexts, Event, Span, SpanData}; use relay_log::protocol::{Attachment, AttachmentType}; @@ -33,6 +33,7 @@ use crate::services::processor::{ }; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::{sample, BufferGuard, ItemAction}; +use relay_event_normalization::span::ai::extract_ai_measurements; use thiserror::Error; #[derive(Error, Debug)] @@ -52,12 +53,14 @@ pub fn process( ErrorBoundary::Ok(ref config) if config.is_enabled() => Some(config), _ => None, }; + let ai_model_costs_config = global_config.ai_model_costs.clone().ok(); let normalize_span_config = get_normalize_span_config( config, state.managed_envelope.received_at(), global_config.measurements.as_ref(), state.project_state.config().measurements.as_ref(), state.project_state.config().performance_score.as_ref(), + ai_model_costs_config.as_ref(), ); let meta = state.managed_envelope.envelope().meta(); @@ -382,6 +385,8 @@ struct NormalizeSpanConfig<'a> { /// If at least one is provided, then normalization will truncate custom measurements /// and add units of known built-in measurements. measurements: Option>, + /// Configuration for AI model cost calculation + ai_model_costs: Option<&'a ModelCosts>, /// The maximum length for names of custom measurements. /// /// Measurements with longer names are removed from the transaction event and replaced with a @@ -395,6 +400,7 @@ fn get_normalize_span_config<'a>( global_measurements_config: Option<&'a MeasurementsConfig>, project_measurements_config: Option<&'a MeasurementsConfig>, performance_score: Option<&'a PerformanceScoreConfig>, + ai_model_costs: Option<&'a ModelCosts>, ) -> NormalizeSpanConfig<'a> { let aggregator_config = AggregatorConfig::from(config.aggregator_config_for(MetricNamespace::Spans)); @@ -415,6 +421,7 @@ fn get_normalize_span_config<'a>( .saturating_sub(MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD), ), performance_score, + ai_model_costs, } } @@ -464,6 +471,7 @@ fn normalize( max_tag_value_size, performance_score, measurements, + ai_model_costs, max_name_and_unit_len, } = config; @@ -543,6 +551,9 @@ fn normalize( ..Default::default() }; normalize_performance_score(&mut event, performance_score); + if let Some(model_costs_config) = ai_model_costs { + extract_ai_measurements(span, model_costs_config); + } span.measurements = event.measurements; tag_extraction::extract_measurements(span, is_mobile); diff --git a/relay-spans/src/span.rs b/relay-spans/src/span.rs index 965a65d008..be37386c32 100644 --- a/relay-spans/src/span.rs +++ b/relay-spans/src/span.rs @@ -646,10 +646,8 @@ mod tests { cache_item_size: ~, http_response_status_code: ~, ai_pipeline_name: ~, + ai_model_id: ~, ai_input_messages: ~, - ai_completion_tokens_used: ~, - ai_prompt_tokens_used: ~, - ai_total_tokens_used: ~, ai_responses: ~, thread_name: ~, segment_name: "my 1st transaction",