diff --git a/src/engine/operators/functions.rs b/src/engine/operators/functions.rs index 682dc806..fc4aaf19 100644 --- a/src/engine/operators/functions.rs +++ b/src/engine/operators/functions.rs @@ -35,3 +35,11 @@ impl<'a> MapOp<&'a str, u8> for RegexMatch { } fn name() -> &'static str { "not" } } + + +pub struct Length; + +impl<'a> MapOp<&'a str, i64> for Length { + fn apply(&self, s: &'a str) -> i64 { s.len() as i64 } + fn name() -> &'static str { "length" } +} diff --git a/src/engine/operators/vector_operator.rs b/src/engine/operators/vector_operator.rs index 4ff95828..c835d81d 100644 --- a/src/engine/operators/vector_operator.rs +++ b/src/engine/operators/vector_operator.rs @@ -614,6 +614,10 @@ impl<'a> VecOperator<'a> { Box::new(MapOperator { input, output, map: RegexMatch { r: regex::Regex::new(r).unwrap() } }) } + pub fn length(input: BufferRef<&'a str>, output: BufferRef) -> BoxedOperator<'a> { + Box::new(MapOperator { input, output, map: Length }) + } + pub fn aggregate(input: TypedBufferRef, grouping: TypedBufferRef, max_index: BufferRef>, diff --git a/src/engine/planning/query_plan.rs b/src/engine/planning/query_plan.rs index b4a28c22..444c768f 100644 --- a/src/engine/planning/query_plan.rs +++ b/src/engine/planning/query_plan.rs @@ -339,6 +339,11 @@ pub enum QueryPlan { #[output] matches: BufferRef, }, + Length { + string: BufferRef<&'static str>, + #[output] + length: BufferRef, + }, /// Outputs a vector of indices from `0..plan.len()` Indices { plan: TypedBufferRef, @@ -806,6 +811,16 @@ impl QueryPlan { } planner.to_year(decoded).into() } + Func1Type::Length => { + let decoded = match t.codec.clone() { + Some(codec) => codec.decode(plan, planner), + None => plan, + }; + if t.decoded != BasicType::String { + bail!(QueryError::TypeError, "Found length({:?}), expected length(string)", &t) + } + planner.length(decoded.str()?).into() + } Func1Type::Not => { let decoded = match t.codec.clone() { Some(codec) => codec.decode(plan, planner), @@ -867,7 +882,7 @@ fn encoding_range(plan: &TypedBufferRef, qp: &QueryPlanner) -> Option<(i64, i64) LZ4Decode { bytes, .. } => encoding_range(&bytes.into(), qp), DeltaDecode { ref plan, .. } => encoding_range(plan, qp), AssembleNullable { ref data, .. } => encoding_range(data, qp), - UnpackStrings { .. } | UnhexpackStrings { .. } => None, + UnpackStrings { .. } | UnhexpackStrings { .. } | Length { .. } => None, ref plan => { // TODO(clemens): many more cases where we can determine range error!("encoding_range not implement for {:?}", plan); @@ -1151,6 +1166,7 @@ pub fn prepare<'a>(plan: QueryPlan, constant_vecs: &mut Vec>, resu QueryPlan::Not { input, not } => VecOperator::not(input, not), QueryPlan::ToYear { timestamp, year } => VecOperator::to_year(timestamp.i64()?, year.i64()?), QueryPlan::Regex { plan, regex, matches } => VecOperator::regex(plan, ®ex, matches), + QueryPlan::Length { string, length } => VecOperator::length(string, length), QueryPlan::Indices { plan, indices } => VecOperator::indices(plan, indices), QueryPlan::SortBy { ranking, indices, desc, stable, permutation } => VecOperator::sort_by(ranking, indices, desc, stable, permutation)?, QueryPlan::TopN { ranking, n, desc, tmp_keys, top_n } => VecOperator::top_n(ranking, tmp_keys, n, desc, top_n)?, diff --git a/src/syntax/expression.rs b/src/syntax/expression.rs index 96cb4a8b..2e9e98b6 100644 --- a/src/syntax/expression.rs +++ b/src/syntax/expression.rs @@ -38,6 +38,7 @@ pub enum Func1Type { Not, IsNull, IsNotNull, + Length, } impl Expr { diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 663f2b26..65a7fdf6 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -119,6 +119,13 @@ fn expr(node: &ASTNode) -> Result, QueryError> { } Expr::Func2(Func2Type::RegexMatch, expr(&args[0])?, expr(&args[1])?) } + "LENGTH" => { + if args.len() != 1 { + return Err(QueryError::ParseError( + "Expected one arguments in length function".to_string())); + } + Expr::Func1(Func1Type::Length, expr(&args[0])?) + } "COUNT" => { if args.len() != 1 { return Err(QueryError::ParseError( diff --git a/tests/query_tests.rs b/tests/query_tests.rs index 5cba600e..036cd433 100644 --- a/tests/query_tests.rs +++ b/tests/query_tests.rs @@ -527,6 +527,21 @@ fn test_group_by_trip_id() { ) } +#[test] +fn test_string_length() { + test_query_nyc( + "SELECT length(pickup_ntaname), pickup_ntaname, COUNT(0) + FROM default + ORDER BY length(pickup_ntaname) DESC + LIMIT 3;", + &[ + vec![Int(56), Str("Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill"), Int(1)], + vec![Int(50), Str("Mariner\'s Harbor-Arlington-Port Ivory-Graniteville"), Int(3)], + vec![Int(48), Str("DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill"), Int(245)], + ], + ) +} + #[test] fn test_group_by_negative_expression() { test_query_ec(