diff --git a/Cargo.lock b/Cargo.lock index b1ee7a10bae..cc8b3ccae6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1801,7 +1801,6 @@ name = "icu_pattern" version = "0.1.5" dependencies = [ "displaydoc", - "iai", "writeable", ] diff --git a/utils/pattern/Cargo.toml b/utils/pattern/Cargo.toml index f467a06bf47..424a4d37e13 100644 --- a/utils/pattern/Cargo.toml +++ b/utils/pattern/Cargo.toml @@ -24,9 +24,6 @@ all-features = true displaydoc = { version = "0.2.3", default-features = false } writeable = { workspace = true } -[dev-dependencies] -iai = "0.1" - -[[bench]] -name = "parser_iai" -harness = false +[features] +alloc = [] +std = ["alloc"] diff --git a/utils/pattern/README.md b/utils/pattern/README.md index 631433abbdb..27ea0c2ca13 100644 --- a/utils/pattern/README.md +++ b/utils/pattern/README.md @@ -4,101 +4,30 @@ `icu_pattern` is a utility crate of the [`ICU4X`] project. -It includes a [`Pattern`] struct which wraps a paid of [`Parser`] and [`Interpolator`] allowing for parsing and interpolation of ICU placeholder patterns, like "{0} days" or -"{0}, {1}" with custom elements and string literals. +It includes a [`Pattern`] type which supports patterns with various storage backends. -## Placeholders & Elements - -The [`Parser`] is generic over any `Placeholder` which implements [`FromStr`] -allowing the consumer to parse placeholder patterns such as "{0}, {1}", -"{date}, {time}" or any other. - -The [`Interpolator`] can interpolate the [`Pattern`] against any -iterator over `Element`. +The types are tightly coupled with the [`writeable`] crate. ## Examples -In the following example we're going to use a custom `Token` type, -and an `Element` type which will be either a `Token` or a string slice. - -For the purpose of the example, a higher level -[`interpolate_to_string`](Pattern::interpolate_to_string) method -is being used. +Parsing and interpolating with a single-placeholder pattern: ```rust -use icu_pattern::Pattern; -use std::{borrow::Cow, convert::TryInto, fmt::Display}; - -#[derive(Debug, PartialEq)] -enum ExampleToken { - Year, - Month, - Day, - Hour, - Minute, -} - -impl Display for ExampleToken { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[{:?}]", self) - } -} +use icu_pattern::SinglePlaceholderPattern; +use writeable::assert_writeable_eq; -#[derive(Debug, PartialEq)] -enum ExampleElement<'s> { - Token(ExampleToken), - Literal(Cow<'s, str>), -} +// Parse a pattern string: +let pattern = "Hello, {0}!" + .parse::>() + .unwrap(); -impl Display for ExampleElement<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Token(token) => token.fmt(f), - Self::Literal(lit) => lit.fmt(f), - } - } -} +// Interpolate into the pattern string: +assert_writeable_eq!(pattern.interpolate(["World"]), "Hello, World!"); -let pattern: Pattern = - "{0}, {1}".try_into().expect("Failed to parse a pattern."); - -let replacements = vec![ - vec![ - ExampleElement::Token(ExampleToken::Year), - ExampleElement::Literal("-".into()), - ExampleElement::Token(ExampleToken::Month), - ExampleElement::Literal("-".into()), - ExampleElement::Token(ExampleToken::Day), - ], - vec![ - ExampleElement::Token(ExampleToken::Hour), - ExampleElement::Literal(":".into()), - ExampleElement::Token(ExampleToken::Minute), - ], -]; - -assert_eq!( - pattern - .interpolate_to_string::(&replacements) - .expect("Failed to interpolate a pattern."), - "[Year]-[Month]-[Day], [Hour]:[Minute]" -); +// Introspect the serialized form of the pattern string: +assert_eq!(pattern.take_store(), "\x08Hello, !"); ``` -## Combinators - -In the example above, the replacements will be parsed at compile time and stored on a [`Vec`], -which is a collection type that has an implementation for [`ReplacementProvider`] -trait. - -In real use, the consumer may want to use different models of replacement provider, -and different element schemas. -Because the replacement is an iterator itself, it allows for other, more specialized parsers, -to be used to lazily parse particular patterns that are meant to replace the placeholders. -This allows for lazy parsing of those specialized patterns to be triggered -only if the placeholder pattern encounters a placeholder key that requires given -pattern to be used. - [`ICU4X`]: ../icu/index.html [`FromStr`]: std::str::FromStr diff --git a/utils/pattern/benches/parser_iai.rs b/utils/pattern/benches/parser_iai.rs deleted file mode 100644 index 659d2ebe9c2..00000000000 --- a/utils/pattern/benches/parser_iai.rs +++ /dev/null @@ -1,281 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use icu_pattern::*; -use std::{borrow::Cow, convert::TryInto, fmt::Display}; - -#[derive(Debug)] -struct Token; - -impl std::fmt::Display for Token { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{self:?}") - } -} - -fn iai_parse() { - let samples = vec![ - ("{0} - {1}", vec![vec!["Hello"], vec!["World"]]), - ("{1} - {0}", vec![vec!["Hello"], vec!["World"]]), - ( - "{0}, {1} 'and' {2}", - vec![vec!["Start"], vec!["Middle"], vec!["End"]], - ), - ("{0} 'at' {1}", vec![vec!["Hello"], vec!["World"]]), - ]; - - for sample in &samples { - let mut p = Parser::::new( - sample.0, - ParserOptions { - allow_raw_letters: false, - }, - ); - while p.try_next().unwrap().is_some() {} - } -} - -#[derive(Debug)] -pub enum Element<'s> { - Literal(Cow<'s, str>), -} - -impl Display for Element<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Literal(s) => f.write_str(s), - } - } -} - -impl<'s> From<&'s str> for Element<'s> { - fn from(input: &'s str) -> Self { - Self::Literal(input.into()) - } -} - -fn iai_interpolate() { - let samples = vec![ - ("{0} - {1}", vec!["Hello", "World"]), - ("{1} - {0}", vec!["Hello", "World"]), - ("{0}, {1} 'and' {2}", vec!["Start", "Middle", "End"]), - ("{0} 'at' {1}", vec!["Hello", "World"]), - ]; - - for sample in &samples { - let pattern: Pattern = Parser::new( - sample.0, - ParserOptions { - allow_raw_letters: false, - }, - ) - .try_into() - .unwrap(); - - let replacements: Vec = sample.1.iter().map(|r| Element::from(*r)).collect(); - - let _ = pattern.interpolate_to_string(&replacements).unwrap(); - } -} - -fn iai_parsed_interpolate() { - let samples = &[ - ( - vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { - content: " - ".into(), - quoted: false, - }, - PatternToken::Placeholder(1), - ] - .into(), - vec!["Hello", "World"], - ), - ( - vec![ - PatternToken::Placeholder(1), - PatternToken::Literal { - content: " - ".into(), - quoted: false, - }, - PatternToken::Placeholder(0), - ] - .into(), - vec!["Hello", "World"], - ), - ( - vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { - content: ", ".into(), - quoted: false, - }, - PatternToken::Placeholder(1), - PatternToken::Literal { - content: " ".into(), - quoted: false, - }, - PatternToken::Literal { - content: "and".into(), - quoted: true, - }, - PatternToken::Literal { - content: " ".into(), - quoted: false, - }, - PatternToken::Placeholder(2), - ] - .into(), - vec!["Start", "Middle", "End"], - ), - ( - vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { - content: " ".into(), - quoted: false, - }, - PatternToken::Literal { - content: "at".into(), - quoted: true, - }, - PatternToken::Literal { - content: " ".into(), - quoted: false, - }, - PatternToken::Placeholder(1), - ] - .into(), - vec!["Hello", "World"], - ), - ]; - - for sample in samples { - let pattern: &Pattern = &sample.0; - - let replacements: Vec = sample.1.iter().map(|r| Element::from(*r)).collect(); - - let _ = pattern.interpolate_to_string(&replacements).unwrap(); - } -} - -fn iai_parsed_interpolate_composed() { - let samples = &[ - ( - vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { - content: " - ".into(), - quoted: false, - }, - PatternToken::Placeholder(1), - ] - .into(), - vec!["Hello", "World"], - ), - ( - vec![ - PatternToken::Placeholder(1), - PatternToken::Literal { - content: " - ".into(), - quoted: false, - }, - PatternToken::Placeholder(0), - ] - .into(), - vec!["Hello", "World"], - ), - ( - vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { - content: ", ".into(), - quoted: false, - }, - PatternToken::Placeholder(1), - PatternToken::Literal { - content: " ".into(), - quoted: false, - }, - PatternToken::Literal { - content: "and".into(), - quoted: true, - }, - PatternToken::Literal { - content: " ".into(), - quoted: false, - }, - PatternToken::Placeholder(2), - ] - .into(), - vec!["Start", "Middle", "End"], - ), - ( - vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { - content: " ".into(), - quoted: false, - }, - PatternToken::Literal { - content: "at".into(), - quoted: true, - }, - PatternToken::Literal { - content: " ".into(), - quoted: false, - }, - PatternToken::Placeholder(1), - ] - .into(), - vec!["Hello", "World"], - ), - ]; - - for sample in samples { - let pattern: &Pattern = &sample.0; - - let replacements: Vec> = - sample.1.iter().map(|r| vec![Element::from(*r)]).collect(); - - let _ = pattern - .interpolate_to_string::<'_, Element, _>(&replacements) - .unwrap(); - } -} - -fn iai_named_interpolate() { - let named_samples = vec![( - "{start}, {middle} 'and' {end}", - vec![("start", "Start"), ("middle", "Middle"), ("end", "End")], - )]; - - for sample in &named_samples { - let pattern: Pattern<_> = Parser::new( - sample.0, - ParserOptions { - allow_raw_letters: false, - }, - ) - .try_into() - .unwrap(); - - let replacements: std::collections::HashMap = sample - .1 - .iter() - .map(|&(k, v)| (k.to_owned(), Element::from(v))) - .collect(); - - let _ = pattern.interpolate_to_string(&replacements).unwrap(); - } -} - -iai::main!( - iai_parse, - iai_interpolate, - iai_parsed_interpolate, - iai_parsed_interpolate_composed, - iai_named_interpolate -); diff --git a/utils/pattern/examples/borrowed_pattern.rs b/utils/pattern/examples/borrowed_pattern.rs deleted file mode 100644 index d0fb8a62f0f..00000000000 --- a/utils/pattern/examples/borrowed_pattern.rs +++ /dev/null @@ -1,46 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use icu_pattern::{Pattern, PatternToken}; -use std::fmt::Display; - -#[derive(Debug)] -enum Element { - Token(usize), -} - -impl Display for Element { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Token(n) => write!(f, "{n}"), - } - } -} - -struct Data<'s> { - placeholder_pattern: Pattern<'s, usize>, - replacement_patterns: Vec, -} - -fn main() { - let data = Data { - placeholder_pattern: vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { - content: " days".into(), - quoted: false, - }, - ] - .into(), - replacement_patterns: vec![Element::Token(5)], - }; - - let interpolated_pattern = data - .placeholder_pattern - .interpolate(&data.replacement_patterns) - .expect("Failed to interpolate."); - let result = interpolated_pattern.to_string(); - - assert_eq!(result, "5 days"); -} diff --git a/utils/pattern/examples/owned_pattern.rs b/utils/pattern/examples/owned_pattern.rs deleted file mode 100644 index 4350ec3c84a..00000000000 --- a/utils/pattern/examples/owned_pattern.rs +++ /dev/null @@ -1,33 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use icu_pattern::Pattern; -use std::{convert::TryInto, fmt::Display}; - -#[derive(Debug)] -enum Element { - Token(usize), -} - -impl Display for Element { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Token(n) => write!(f, "{n}"), - } - } -} - -fn main() { - let replacements = vec![Element::Token(5)]; - - let pattern: Pattern = "{0} 'days'".try_into().expect("Failed to parse a pattern"); - - let interpolated_pattern = pattern - .interpolate(&replacements) - .expect("Failed to interpolate a pattern"); - - let result = interpolated_pattern.to_string(); - - assert_eq!(result, "5 days"); -} diff --git a/utils/pattern/src/builder.rs b/utils/pattern/src/builder.rs new file mode 100644 index 00000000000..cc676695cb2 --- /dev/null +++ b/utils/pattern/src/builder.rs @@ -0,0 +1,32 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::{fmt, str::FromStr}; + +use crate::{ParsedPatternItem, Parser, PatternError, PatternItemCow}; + +impl<'a, K> From> for PatternItemCow<'a, K> { + fn from(value: ParsedPatternItem<'a, K>) -> Self { + match value { + ParsedPatternItem::Literal { content, .. } => PatternItemCow::Literal(content), + ParsedPatternItem::Placeholder(key) => PatternItemCow::Placeholder(key), + } + } +} + +impl<'a, K> Iterator for Parser<'a, K> +where + K: FromStr, + K::Err: fmt::Debug, +{ + type Item = Result, PatternError>; + + fn next(&mut self) -> Option { + match self.try_next() { + Ok(Some(pattern_token)) => Some(Ok(pattern_token.into())), + Ok(None) => None, + Err(_e) => Some(Err(PatternError::InvalidPattern)), + } + } +} diff --git a/utils/pattern/src/common.rs b/utils/pattern/src/common.rs new file mode 100644 index 00000000000..86e43d59221 --- /dev/null +++ b/utils/pattern/src/common.rs @@ -0,0 +1,105 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::Error; +use writeable::Writeable; + +#[cfg(feature = "alloc")] +use alloc::{borrow::Cow, borrow::ToOwned}; + +/// A borrowed item in a [`Pattern`]. Items are either string literals or placeholders. +/// +/// [`Pattern`]: crate::Pattern +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_enums)] // Part of core data model +pub enum PatternItem<'a, T> { + /// A placeholder of the type specified on this [`PatternItem`]. + Placeholder(T), + /// A string literal. This can occur in one of three places: + /// + /// 1. Between the start of the string and the first placeholder (prefix) + /// 2. Between two placeholders (infix) + /// 3. Between the final placeholder and the end of the string (suffix) + Literal(&'a str), +} + +/// A borrowed-or-owned item in a [`Pattern`]. Items are either string literals or placeholders. +/// +/// ✨ *Enabled with the `alloc` Cargo feature.* +/// +/// [`Pattern`]: crate::Pattern +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_enums)] // Part of core data model +#[cfg(feature = "alloc")] +pub enum PatternItemCow<'a, T> { + /// A placeholder of the type specified on this [`PatternItemCow`]. + Placeholder(T), + /// A string literal. This can occur in one of three places: + /// + /// 1. Between the start of the string and the first placeholder (prefix) + /// 2. Between two placeholders (infix) + /// 3. Between the final placeholder and the end of the string (suffix) + Literal(Cow<'a, str>), +} + +/// Types that implement backing data models for [`Pattern`] implement this trait. +/// +/// The trait has no public methods and is not implementable outside of this crate. +/// +/// [`Pattern`]: crate::Pattern +pub trait PatternBackend: crate::private::Sealed { + /// The type to be used as the placeholder key in code. + type PlaceholderKey; + + /// The unsized type of the store required for this backend, usually `str` or `[u8]`. + type Store: ?Sized; + + /// The iterator type returned by [`Self::try_from_items`]. + #[doc(hidden)] // TODO(#4467): Should be internal + type Iter<'a>: Iterator> + where + Self: 'a; + + /// Checks a store for validity, returning an error if invalid. + #[doc(hidden)] // TODO(#4467): Should be internal + fn validate_store(store: &Self::Store) -> Result<(), Error>; + + /// Constructs a store from pattern items. + #[doc(hidden)] + // TODO(#4467): Should be internal + // Note: it is not good practice to feature-gate trait methods, but this trait is sealed + #[cfg(feature = "alloc")] + fn try_from_items< + 'a, + I: Iterator, Error>>, + >( + items: I, + ) -> Result<::Owned, Error> + where + Self: 'a, + Self::Store: ToOwned; + + /// Iterates over the pattern items in a store. + #[doc(hidden)] // TODO(#4467): Should be internal + fn iter_items(store: &Self::Store) -> Self::Iter<'_>; +} + +pub trait PlaceholderValueProvider { + type W<'a>: Writeable + where + Self: 'a; + + /// Returns the [`Writeable`] to substitute for the given placeholder. + fn value_for(&self, key: K) -> Self::W<'_>; +} + +impl<'b, K, T> PlaceholderValueProvider for &'b T +where + T: PlaceholderValueProvider + ?Sized, +{ + type W<'a> = T::W<'a> where T: 'a, 'b: 'a; + fn value_for(&self, key: K) -> Self::W<'_> { + (*self).value_for(key) + } +} diff --git a/utils/pattern/src/error.rs b/utils/pattern/src/error.rs new file mode 100644 index 00000000000..5be1405c113 --- /dev/null +++ b/utils/pattern/src/error.rs @@ -0,0 +1,15 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use displaydoc::Display; + +#[derive(Debug, Display)] +#[non_exhaustive] +pub enum PatternError { + #[displaydoc("Syntax error in pattern string or invalid serialized pattern")] + InvalidPattern, +} + +#[cfg(feature = "std")] +impl std::error::Error for PatternError {} diff --git a/utils/pattern/src/frontend.rs b/utils/pattern/src/frontend.rs new file mode 100644 index 00000000000..8bd2b27b9e4 --- /dev/null +++ b/utils/pattern/src/frontend.rs @@ -0,0 +1,377 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::{ + fmt::{self, Write}, + marker::PhantomData, +}; + +use writeable::{PartsWrite, Writeable}; + +use crate::common::*; +use crate::Error; + +#[cfg(feature = "alloc")] +use crate::{Parser, ParserOptions}; +#[cfg(feature = "alloc")] +use alloc::{borrow::ToOwned, str::FromStr, string::String}; + +/// A string pattern with placeholders. +/// +/// There are 2 generic parameters: `Backend` and `Store`. +/// +/// # Backend +/// +/// This determines the nature of placeholders and serialized encoding of the pattern. +/// +/// The following backends are available: +/// +/// - [`SinglePlaceholder`] for patterns with one placeholder: `"{0} days ago"` +/// +/// # Store +/// +/// The data structure has a flexible backing data store. The only requirement for most +/// functionality is that it implement `AsRef` (backend-dependent). +/// +/// Example stores: +/// +/// - `&str` for a fully borrowed pattern +/// - `String` for a fully owned pattern +/// - `Cow` for an owned-or-borrowed pattern +/// +/// [`SinglePlaceholder`]: crate::SinglePlaceholder +#[derive(Debug)] +pub struct Pattern { + _backend: PhantomData, + store: Store, +} + +impl Pattern { + pub fn take_store(self) -> Store { + self.store + } +} + +impl Pattern +where + B: PatternBackend, + Store: AsRef, +{ + /// Creates a pattern from a serialized backing store. + /// + /// To parse a pattern string, use [`Self::try_from_str()`]. + /// + /// # Examples + /// + /// ``` + /// use icu_pattern::Pattern; + /// use icu_pattern::SinglePlaceholder; + /// + /// // Create a pattern from a valid store: + /// Pattern::::try_from_store("\x01 days") + /// .expect("valid pattern"); + /// + /// // Error on an invalid pattern: + /// Pattern::::try_from_store("\x09 days") + /// .expect_err("9 is out of bounds"); + /// ``` + pub fn try_from_store(store: Store) -> Result { + B::validate_store(store.as_ref())?; + Ok(Self { + _backend: PhantomData, + store, + }) + } +} + +#[cfg(feature = "alloc")] +impl Pattern::Owned> +where + B: PatternBackend, + B::Store: ToOwned, +{ + /// Creates a pattern from an iterator of pattern items. + /// + /// ✨ *Enabled with the `alloc` Cargo feature.* + /// + /// # Examples + /// + /// ``` + /// use icu_pattern::Pattern; + /// use icu_pattern::PatternItemCow; + /// use icu_pattern::SinglePlaceholder; + /// use icu_pattern::SinglePlaceholderKey; + /// use std::borrow::Cow; + /// + /// Pattern::::try_from_items( + /// [ + /// PatternItemCow::Placeholder(SinglePlaceholderKey::Singleton), + /// PatternItemCow::Literal(Cow::Borrowed(" days")), + /// ] + /// .into_iter(), + /// ) + /// .expect("valid pattern items"); + /// ``` + pub fn try_from_items<'a, I>(items: I) -> Result + where + B: 'a, + I: Iterator>, + { + let store = B::try_from_items(items.map(Ok))?; + #[cfg(debug_assertions)] + match B::validate_store(core::borrow::Borrow::borrow(&store)) { + Ok(()) => (), + Err(e) => { + debug_assert!(false, "{:?}", e); + } + }; + Ok(Self { + _backend: PhantomData, + store, + }) + } +} + +#[cfg(feature = "alloc")] +impl Pattern::Owned> +where + B: PatternBackend, + B::PlaceholderKey: FromStr, + B::Store: ToOwned, + ::Err: fmt::Debug, +{ + /// Creates a pattern by parsing a syntax string. + /// + /// To construct from a serialized pattern string, use [`Self::try_from_store()`]. + /// + /// ✨ *Enabled with the `alloc` Cargo feature.* + /// + /// # Examples + /// + /// ``` + /// use icu_pattern::Pattern; + /// use icu_pattern::SinglePlaceholder; + /// + /// // Create a pattern from a valid string: + /// Pattern::::try_from_str("{0} days") + /// .expect("valid pattern"); + /// + /// // Error on an invalid pattern: + /// Pattern::::try_from_str("{0 days") + /// .expect_err("mismatched braces"); + /// ``` + pub fn try_from_str(pattern: &str) -> Result { + let parser = Parser::new( + pattern, + ParserOptions { + allow_raw_letters: true, + }, + ); + let store = B::try_from_items(parser)?; + #[cfg(debug_assertions)] + match B::validate_store(core::borrow::Borrow::borrow(&store)) { + Ok(()) => (), + Err(e) => { + debug_assert!(false, "{:?} for pattern {:?}", e, pattern); + } + }; + Ok(Self { + _backend: PhantomData, + store, + }) + } +} + +#[cfg(feature = "alloc")] +impl FromStr for Pattern::Owned> +where + B: PatternBackend, + B::PlaceholderKey: FromStr, + B::Store: ToOwned, + ::Err: fmt::Debug, +{ + type Err = Error; + fn from_str(pattern: &str) -> Result { + Self::try_from_str(pattern) + } +} + +impl Pattern +where + B: PatternBackend, + Store: AsRef + ?Sized, +{ + /// Returns an iterator over the [`PatternItem`]s in this pattern. + pub fn iter(&self) -> impl Iterator> + '_ { + B::iter_items(self.store.as_ref()) + } + + /// Returns a [`Writeable`] that interpolates items from the given replacement provider + /// into this pattern string. + pub fn interpolate<'a, P>(&'a self, value_provider: P) -> impl Writeable + fmt::Display + 'a + where + P: PlaceholderValueProvider + 'a, + { + WriteablePattern:: { + store: self.store.as_ref(), + value_provider, + } + } + + #[cfg(feature = "alloc")] + /// Interpolates the pattern directly to a string. + /// + /// ✨ *Enabled with the `alloc` Cargo feature.* + pub fn interpolate_to_string

(&self, value_provider: P) -> String + where + P: PlaceholderValueProvider, + { + self.interpolate(value_provider) + .write_to_string() + .into_owned() + } + + /// Interpolates items with [writeable::Part]s. + /// + /// Two parts are used: + /// + /// 1. `literal_part` for [`PatternItem::Literal`] + /// 2. `element_part` for [`PatternItem::Placeholder`] + /// + /// # Examples + /// + /// ``` + /// use icu_pattern::Pattern; + /// use icu_pattern::SinglePlaceholder; + /// use writeable::assert_writeable_parts_eq; + /// + /// let pattern = + /// Pattern::::try_from_str("Hello, {0}!").unwrap(); + /// + /// const LITERAL_PART: writeable::Part = writeable::Part { + /// category: "demo", + /// value: "literal", + /// }; + /// const ELEMENT_PART: writeable::Part = writeable::Part { + /// category: "demo", + /// value: "element", + /// }; + /// + /// assert_writeable_parts_eq!( + /// pattern.interpolate_with_parts(["Alice"], LITERAL_PART, ELEMENT_PART), + /// "Hello, Alice!", + /// [ + /// (0, 7, LITERAL_PART), + /// (7, 12, ELEMENT_PART), + /// (12, 13, LITERAL_PART), + /// ] + /// ); + /// ``` + pub fn interpolate_with_parts<'a, P>( + &'a self, + value_provider: P, + literal_part: writeable::Part, + placeholder_value_part: writeable::Part, + ) -> impl Writeable + fmt::Display + 'a + where + P: PlaceholderValueProvider + 'a, + { + WriteablePatternWithParts:: { + store: self.store.as_ref(), + value_provider, + literal_part, + element_part: placeholder_value_part, + } + } +} + +struct WriteablePattern<'a, B: PatternBackend, P> { + store: &'a B::Store, + value_provider: P, +} + +impl Writeable for WriteablePattern<'_, B, P> +where + B: PatternBackend, + P: PlaceholderValueProvider, +{ + fn write_to(&self, sink: &mut W) -> fmt::Result { + let it = B::iter_items(self.store); + #[cfg(debug_assertions)] + let (size_hint, mut actual_len) = (it.size_hint(), 0); + for item in it { + match item { + PatternItem::Literal(s) => { + sink.write_str(s)?; + } + PatternItem::Placeholder(key) => { + let element_writeable = self.value_provider.value_for(key); + element_writeable.write_to(sink)?; + } + } + #[cfg(debug_assertions)] + { + actual_len += 1; + } + } + #[cfg(debug_assertions)] + { + debug_assert!(actual_len >= size_hint.0); + if let Some(max_len) = size_hint.1 { + debug_assert!(actual_len <= max_len); + } + } + Ok(()) + } +} + +impl fmt::Display for WriteablePattern<'_, B, P> +where + B: PatternBackend, + P: PlaceholderValueProvider, +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.write_to(f) + } +} + +struct WriteablePatternWithParts<'a, B: PatternBackend, P> { + store: &'a B::Store, + value_provider: P, + literal_part: writeable::Part, + element_part: writeable::Part, +} + +impl Writeable for WriteablePatternWithParts<'_, B, P> +where + B: PatternBackend, + P: PlaceholderValueProvider, +{ + fn write_to_parts(&self, sink: &mut S) -> fmt::Result { + for item in B::iter_items(self.store) { + match item { + PatternItem::Literal(s) => { + sink.with_part(self.literal_part, |w| w.write_str(s))?; + } + PatternItem::Placeholder(key) => { + let element_writeable = self.value_provider.value_for(key); + sink.with_part(self.element_part, |w| element_writeable.write_to_parts(w))?; + } + } + } + Ok(()) + } +} + +impl fmt::Display for WriteablePatternWithParts<'_, B, P> +where + B: PatternBackend, + P: PlaceholderValueProvider, +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.write_to(f) + } +} diff --git a/utils/pattern/src/interpolator/error.rs b/utils/pattern/src/interpolator/error.rs deleted file mode 100644 index 14b817b1092..00000000000 --- a/utils/pattern/src/interpolator/error.rs +++ /dev/null @@ -1,34 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use displaydoc::Display; -use std::{fmt::Debug, str::FromStr}; - -/// An error returned when interpolating a pattern. -/// -/// # Type parameters -/// -/// - `K`: A key for the replacement provider. -#[derive(Display, Debug, PartialEq)] -pub enum InterpolatorError -where - K: Debug + FromStr + PartialEq, - K::Err: Debug + PartialEq, -{ - #[displaydoc("Invalid placeholder: {0:?}")] - InvalidPlaceholder(K::Err), - #[displaydoc("Missing placeholder: {0:?}")] - MissingPlaceholder(K), - #[displaydoc("Unclosed placeholder")] - UnclosedPlaceholder, - #[displaydoc("Unclosed quoted literal")] - UnclosedQuotedLiteral, -} - -impl std::error::Error for InterpolatorError -where - K: Debug + FromStr + PartialEq, - K::Err: Debug + PartialEq, -{ -} diff --git a/utils/pattern/src/interpolator/mod.rs b/utils/pattern/src/interpolator/mod.rs deleted file mode 100644 index 5c81fa0d4f6..00000000000 --- a/utils/pattern/src/interpolator/mod.rs +++ /dev/null @@ -1,406 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -mod error; -use crate::{replacement::ReplacementProvider, token::PatternToken}; -pub use error::InterpolatorError; -use std::{ - borrow::Cow, - fmt::{Debug, Display, Formatter}, - str::FromStr, -}; -use writeable::Writeable; - -/// The type returned by the [`Interpolator`] iterator. -/// This enum stores references to string literals parsed as -/// part of the pattern and elements returned by the [`ReplacementProvider`]. -/// -/// # Lifetimes -/// -/// - `i`: The life time of a kind that is being interpolated. -/// - `s`: The life time of a string slice literal. -#[derive(Debug, PartialEq)] -pub enum InterpolatedKind<'i, 's, E> { - Literal(&'i Cow<'s, str>), - Element(&'i E), -} - -impl<'i, 's, E> Writeable for InterpolatedKind<'i, 's, E> -where - E: Writeable, -{ - fn write_to(&self, sink: &mut W) -> std::result::Result<(), std::fmt::Error> - where - W: std::fmt::Write + ?Sized, - { - match self { - Self::Literal(lit) => sink.write_str(lit), - Self::Element(elem) => elem.write_to(sink), - } - } -} - -impl<'i, 's, E> Display for InterpolatedKind<'i, 's, E> -where - E: Display, -{ - fn fmt(&self, f: &mut Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { - match self { - Self::Literal(lit) => f.write_str(lit), - Self::Element(elem) => elem.fmt(f), - } - } -} - -type Result = std::result::Result, InterpolatorError>; - -/// Placeholder pattern interpolator. -/// -/// The interpolator takes a pattern parser iterator and a replacement provider and -/// generates a new iterator over elements. -/// -/// The replacement may be any type, and the only bind is on the [`ReplacementProvider`] to -/// be able to return an iterator over elements to be interplotted into the pattern in -/// place of the placeholders based on the placeholder keys. -/// -/// # Examples -/// ``` -/// use icu_pattern::{ -/// InterpolatedKind, Interpolator, Parser, ParserOptions, Pattern, -/// }; -/// -/// #[derive(Debug, PartialEq)] -/// enum Element { -/// Value(usize), -/// } -/// -/// let pattern: Pattern<_> = Parser::new( -/// "{0} days ago", -/// ParserOptions { -/// allow_raw_letters: true, -/// }, -/// ) -/// .try_into() -/// .unwrap(); -/// -/// let replacements = vec![Element::Value(5)]; -/// -/// let mut interpolator = Interpolator::new(&pattern, &replacements); -/// -/// let mut result = vec![]; -/// -/// while let Some(element) = -/// interpolator.try_next().expect("Failed to advance iterator") -/// { -/// result.push(element); -/// } -/// -/// assert_eq!( -/// result, -/// &[ -/// InterpolatedKind::Element(&Element::Value(5)), -/// InterpolatedKind::Literal(&" days ago".into()), -/// ] -/// ); -/// ``` -/// -/// # Type parameters -/// -/// - `R`: A replacement provider type implementing [`ReplacementProvider`]. -/// - `E`: An element type returned by the [`ReplacementProvider`]. -/// -/// # Lifetimes -/// -/// - `i`: The life time of an input pattern slice. -/// - `p`: The life time of an input [`PatternToken`], which is the life time of the string slice. -/// -/// # Element & Replacement Provider -/// -/// In order to allow for wide range of inputs to be interpolated using the placeholder pattern, -/// the `Element` and [`ReplacementProvider`] types are generic. -/// This allows the consumer of the API to decide what elements the pattern should return and how -/// will they be identified based on any type of key that can be parsed out of a string slice. -/// -/// This design allows for the interpolator to remain agnostic and flexible and handles wide range -/// of ownership and life time models. -/// -/// To simplify the common use cases, the [`ReplacementProvider`] comes with implementations for -/// [`Vec`] (where the placehoder key is [`usize`]) and [`HashMap`] (where the placeholder key is -/// [`String`]) but the consumer is free to implement their own providers for any type they wish. -/// -/// # Design Decisions -/// -/// The interpolator is written in an intentionally generic way to enable use against wide range -/// of potential placeholder pattern models and use cases. -/// -/// ## Fallible Iterator -/// -/// Rust providers a strong support for iterators and iterator combinators, which -/// fits very well into the design of this parser/interpolator model. -/// -/// Unfortunately, Rust iterators at the moment are infallible, while parsers are inhereantely -/// fallible. As such, the decision has been made to design the API in line with what -/// we hope will become a trait signature of a fallible iterator in the future, rather -/// than implementing a reversed infallible iterator (where the [`Item`] would be -/// `Option>`). -/// -/// Since the interpolator chains on top of the [`Parser`] it inherits the same fallible -/// iterator API and behavior. -/// -/// [`Item`]: std::iter::Iterator::Item -/// [`HashMap`]: std::collections::HashMap -/// [`Parser`]: crate::parser::Parser -/// [`IntoIterVec`]: crate::pattern::IntoIterVec -#[derive(Debug)] -pub struct Interpolator<'i, 'p, R, E> -where - R: ReplacementProvider<'i, E>, -{ - tokens: &'i [PatternToken<'p, R::Key>], - token_idx: usize, - replacements: &'i R, - current_replacement: Option, -} - -impl<'i, 'p, R, E> Interpolator<'i, 'p, R, E> -where - R: ReplacementProvider<'i, E>, -{ - /// Creates a new `Interpolator`. - /// - /// # Examples - /// ``` - /// use icu_pattern::{Interpolator, Parser, ParserOptions, Pattern}; - /// - /// enum Element { - /// Literal(String), - /// Token, - /// } - /// - /// let pattern: Pattern = Parser::new( - /// "{0}, {1}", - /// ParserOptions { - /// allow_raw_letters: false, - /// }, - /// ) - /// .try_into() - /// .unwrap(); - /// let replacements = vec![vec![Element::Token]]; - /// let mut interpolator = - /// Interpolator::>, Element>::new(&pattern, &replacements); - /// ``` - pub fn new(tokens: &'i [PatternToken<'p, R::Key>], replacements: &'i R) -> Self { - Self { - tokens, - token_idx: 0, - replacements, - current_replacement: None, - } - } - - /// An iterator method that advances the iterator and returns the result of an attempt to - /// interpolate parser and replacement provider tokens. - /// - /// # Examples - /// ``` - /// use icu_pattern::{ - /// InterpolatedKind, Interpolator, Parser, ParserOptions, Pattern, - /// }; - /// - /// #[derive(Debug, PartialEq)] - /// enum Element { - /// TokenOne, - /// TokenTwo, - /// } - /// - /// let mut pattern: Pattern<_> = Parser::new( - /// "{0}, {1}", - /// ParserOptions { - /// allow_raw_letters: false, - /// }, - /// ) - /// .try_into() - /// .unwrap(); - /// - /// let replacements = vec![vec![Element::TokenOne], vec![Element::TokenTwo]]; - /// let mut interpolator = Interpolator::new(&pattern, &replacements); - /// - /// // A call to try_next() returns the next value… - /// assert_eq!( - /// Ok(Some(InterpolatedKind::Element(&Element::TokenOne))), - /// interpolator.try_next() - /// ); - /// assert_eq!( - /// Ok(Some(InterpolatedKind::Literal(&", ".into()))), - /// interpolator.try_next() - /// ); - /// assert_eq!( - /// Ok(Some(InterpolatedKind::Element(&Element::TokenTwo))), - /// interpolator.try_next() - /// ); - /// - /// // … and then `None` once it's over. - /// assert_eq!(Ok(None), interpolator.try_next()); - /// ``` - pub fn try_next(&mut self) -> Result, R::Key> - where - R::Key: Debug + FromStr + PartialEq + Clone, - ::Err: Debug + PartialEq, - { - loop { - if let Some(ref mut replacement) = &mut self.current_replacement { - if let Some(v) = replacement.next() { - return Ok(Some(InterpolatedKind::Element(v))); - } else { - self.current_replacement = None; - } - } - match self.tokens.get(self.token_idx) { - Some(PatternToken::Literal { content, .. }) => { - self.token_idx += 1; - return Ok(Some(InterpolatedKind::Literal(content))); - } - Some(PatternToken::Placeholder(p)) => { - self.token_idx += 1; - self.current_replacement = self.replacements.take_replacement(p); - if self.current_replacement.is_none() { - return Err(InterpolatorError::MissingPlaceholder(p.clone())); - } - } - None => { - return Ok(None); - } - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{Parser, ParserOptions, Pattern, PatternError}; - use std::convert::TryInto; - use std::{borrow::Cow, fmt::Display}; - - const SAMPLES: &[(&str, &[&[&str]], &str)] = &[ - ( - "'Foo' {0} 'and' {1}", - &[&["Hello"], &["World"]], - "Foo Hello and World", - ), - ( - "{0}, {1} 'and' {2}", - &[&["Start"], &["Middle"], &["End"]], - "Start, Middle and End", - ), - ("{0} 'at' {1}", &[&["Hello"], &["World"]], "Hello at World"), - ]; - - #[derive(Debug, PartialEq)] - pub enum Element<'s> { - Literal(Cow<'s, str>), - } - - impl Display for Element<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Literal(s) => f.write_str(s), - } - } - } - - impl<'s> From<&'s str> for Element<'s> { - fn from(input: &'s str) -> Self { - Self::Literal(Cow::Borrowed(input)) - } - } - - #[test] - fn simple_interpolate() { - for sample in SAMPLES.iter() { - let pattern: Pattern = Parser::new( - sample.0, - ParserOptions { - allow_raw_letters: false, - }, - ) - .try_into() - .unwrap(); - - let replacements: Vec> = sample - .1 - .iter() - .map(|r| r.iter().map(|&t| t.into()).collect()) - .collect(); - let interpolated_pattern = pattern - .interpolate::<'_, Element, _>(&replacements) - .unwrap(); - let result = interpolated_pattern.to_string(); - assert_eq!(result, sample.2); - } - } - - #[test] - fn simple_interpolate_hash() { - let named_samples = vec![( - "{start}, {middle} 'and' {end}", - vec![ - ("start", vec!["Start"]), - ("middle", vec!["Middle"]), - ("end", vec!["End"]), - ], - )]; - - for sample in &named_samples { - let pattern: Pattern = Parser::new( - sample.0, - ParserOptions { - allow_raw_letters: false, - }, - ) - .try_into() - .unwrap(); - - let replacements: std::collections::HashMap> = sample - .1 - .iter() - .map(|(k, v)| { - ( - (*k).to_owned(), - v.iter().map(|&t| Element::from(t)).collect(), - ) - }) - .collect(); - - let interpolated_pattern = pattern - .interpolate::<'_, Element, _>(&replacements) - .unwrap(); - let _ = interpolated_pattern.to_string(); - } - } - - #[test] - fn missing_placeholder() { - let samples: Vec<(&str, Vec)> = vec![("{0} days", vec![])]; - - for sample in &samples { - let pattern: Pattern = Parser::new( - sample.0, - ParserOptions { - allow_raw_letters: true, - }, - ) - .try_into() - .expect("Failed to parse a sample"); - - let interpolated_pattern = pattern.interpolate::<'_, Element, _>(&sample.1); - assert_eq!( - interpolated_pattern, - Err(PatternError::Interpolator( - InterpolatorError::MissingPlaceholder(0) - )), - ); - } - } -} diff --git a/utils/pattern/src/lib.rs b/utils/pattern/src/lib.rs index 76347f528d6..53617838484 100644 --- a/utils/pattern/src/lib.rs +++ b/utils/pattern/src/lib.rs @@ -4,105 +4,35 @@ //! `icu_pattern` is a utility crate of the [`ICU4X`] project. //! -//! It includes a [`Pattern`] struct which wraps a paid of [`Parser`] and [`Interpolator`] allowing for parsing and interpolation of ICU placeholder patterns, like "{0} days" or -//! "{0}, {1}" with custom elements and string literals. +//! It includes a [`Pattern`] type which supports patterns with various storage backends. //! -//! # Placeholders & Elements -//! -//! The [`Parser`] is generic over any `Placeholder` which implements [`FromStr`] -//! allowing the consumer to parse placeholder patterns such as "{0}, {1}", -//! "{date}, {time}" or any other. -//! -//! The [`Interpolator`] can interpolate the [`Pattern`] against any -//! iterator over `Element`. +//! The types are tightly coupled with the [`writeable`] crate. //! //! # Examples //! -//! In the following example we're going to use a custom `Token` type, -//! and an `Element` type which will be either a `Token` or a string slice. -//! -//! For the purpose of the example, a higher level -//! [`interpolate_to_string`](Pattern::interpolate_to_string) method -//! is being used. +//! Parsing and interpolating with a single-placeholder pattern: //! //! ``` -//! use icu_pattern::Pattern; -//! use std::{borrow::Cow, convert::TryInto, fmt::Display}; -//! -//! #[derive(Debug, PartialEq)] -//! enum ExampleToken { -//! Year, -//! Month, -//! Day, -//! Hour, -//! Minute, -//! } -//! -//! impl Display for ExampleToken { -//! fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -//! write!(f, "[{:?}]", self) -//! } -//! } -//! -//! #[derive(Debug, PartialEq)] -//! enum ExampleElement<'s> { -//! Token(ExampleToken), -//! Literal(Cow<'s, str>), -//! } -//! -//! impl Display for ExampleElement<'_> { -//! fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -//! match self { -//! Self::Token(token) => token.fmt(f), -//! Self::Literal(lit) => lit.fmt(f), -//! } -//! } -//! } +//! use icu_pattern::SinglePlaceholderPattern; +//! use writeable::assert_writeable_eq; //! -//! let pattern: Pattern = -//! "{0}, {1}".try_into().expect("Failed to parse a pattern."); +//! // Parse a pattern string: +//! let pattern = "Hello, {0}!" +//! .parse::>() +//! .unwrap(); //! -//! let replacements = vec![ -//! vec![ -//! ExampleElement::Token(ExampleToken::Year), -//! ExampleElement::Literal("-".into()), -//! ExampleElement::Token(ExampleToken::Month), -//! ExampleElement::Literal("-".into()), -//! ExampleElement::Token(ExampleToken::Day), -//! ], -//! vec![ -//! ExampleElement::Token(ExampleToken::Hour), -//! ExampleElement::Literal(":".into()), -//! ExampleElement::Token(ExampleToken::Minute), -//! ], -//! ]; +//! // Interpolate into the pattern string: +//! assert_writeable_eq!(pattern.interpolate(["World"]), "Hello, World!"); //! -//! assert_eq!( -//! pattern -//! .interpolate_to_string::(&replacements) -//! .expect("Failed to interpolate a pattern."), -//! "[Year]-[Month]-[Day], [Hour]:[Minute]" -//! ); +//! // Introspect the serialized form of the pattern string: +//! assert_eq!(pattern.take_store(), "\x08Hello, !"); //! ``` //! -//! # Combinators -//! -//! In the example above, the replacements will be parsed at compile time and stored on a [`Vec`], -//! which is a collection type that has an implementation for [`ReplacementProvider`] -//! trait. -//! -//! In real use, the consumer may want to use different models of replacement provider, -//! and different element schemas. -//! Because the replacement is an iterator itself, it allows for other, more specialized parsers, -//! to be used to lazily parse particular patterns that are meant to replace the placeholders. -//! This allows for lazy parsing of those specialized patterns to be triggered -//! only if the placeholder pattern encounters a placeholder key that requires given -//! pattern to be used. -//! //! [`ICU4X`]: ../icu/index.html //! [`FromStr`]: std::str::FromStr // https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations +#![cfg_attr(not(any(test, feature = "std")), no_std)] #![cfg_attr( not(test), deny( @@ -110,20 +40,59 @@ clippy::unwrap_used, clippy::expect_used, clippy::panic, - // TODO(#1668): enable clippy::exhaustive_structs, - // TODO(#1668): enable clippy::exhaustive_enums, + clippy::exhaustive_structs, + clippy::exhaustive_enums, missing_debug_implementations, ) )] -mod interpolator; +#[cfg(feature = "alloc")] +extern crate alloc; + +#[cfg(feature = "alloc")] +mod builder; +mod common; +mod error; +mod frontend; +#[cfg(feature = "alloc")] mod parser; -mod pattern; -mod replacement; -mod token; +mod single; + +pub use common::PatternBackend; +pub use common::PatternItem; +#[cfg(feature = "alloc")] +pub use common::PatternItemCow; +pub use common::PlaceholderValueProvider; +pub use error::PatternError; +pub use frontend::Pattern; +#[cfg(feature = "alloc")] +pub use parser::ParsedPatternItem; +#[cfg(feature = "alloc")] +pub use parser::Parser; +#[cfg(feature = "alloc")] +pub use parser::ParserError; +#[cfg(feature = "alloc")] +pub use parser::ParserOptions; +pub use single::SinglePlaceholder; +pub use single::SinglePlaceholderKey; +#[doc(no_inline)] +pub use PatternError as Error; + +mod private { + pub trait Sealed {} +} -pub use interpolator::{InterpolatedKind, Interpolator, InterpolatorError}; -pub use parser::{Parser, ParserError, ParserOptions}; -pub use pattern::{InterpolatedPattern, Pattern, PatternError}; -pub use replacement::ReplacementProvider; -pub use token::PatternToken; +/// # Examples +/// +/// ``` +/// use icu_pattern::SinglePlaceholderPattern; +/// use writeable::assert_writeable_eq; +/// +/// // Create a pattern from the string syntax: +/// let pattern = +/// SinglePlaceholderPattern::try_from_str("Hello, {0}!").unwrap(); +/// +/// // Interpolate some values into the pattern: +/// assert_writeable_eq!(pattern.interpolate(["Alice"]), "Hello, Alice!"); +/// ``` +pub type SinglePlaceholderPattern = Pattern; diff --git a/utils/pattern/src/parser/error.rs b/utils/pattern/src/parser/error.rs index 347306c844e..ed1baea9a15 100644 --- a/utils/pattern/src/parser/error.rs +++ b/utils/pattern/src/parser/error.rs @@ -2,21 +2,18 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use core::fmt::Debug; use displaydoc::Display; -use std::fmt::Debug; /// An error returned when parsing a pattern. /// +/// ✨ *Enabled with the `alloc` Cargo feature.* +/// /// # Examples /// ``` /// use icu_pattern::{Parser, ParserError, ParserOptions}; /// -/// let mut parser = Parser::::new( -/// "{0", -/// ParserOptions { -/// allow_raw_letters: false, -/// }, -/// ); +/// let mut parser = Parser::::new("{0", ParserOptions::default()); /// assert_eq!(Err(ParserError::UnclosedPlaceholder), parser.try_next()); /// ``` /// @@ -24,8 +21,9 @@ use std::fmt::Debug; /// /// - `E`: An error of the replacement type which implements [`FromStr`]. /// -/// [`FromStr`]: std::str::FromStr +/// [`FromStr`]: core::str::FromStr #[derive(Display, Debug, PartialEq)] +#[non_exhaustive] pub enum ParserError where E: Debug, @@ -47,4 +45,5 @@ where UnclosedQuotedLiteral, } +#[cfg(feature = "std")] impl std::error::Error for ParserError {} diff --git a/utils/pattern/src/parser/mod.rs b/utils/pattern/src/parser/mod.rs index 75e8dc2689c..50af451791d 100644 --- a/utils/pattern/src/parser/mod.rs +++ b/utils/pattern/src/parser/mod.rs @@ -3,10 +3,12 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). pub mod error; +pub mod token; -use crate::token::PatternToken; +use alloc::{borrow::Cow, vec, vec::Vec}; +use core::{fmt::Debug, marker::PhantomData, str::FromStr}; pub use error::ParserError; -use std::{borrow::Cow, fmt::Debug, marker::PhantomData, str::FromStr}; +pub use token::ParsedPatternItem; #[derive(PartialEq, Debug)] enum ParserState { @@ -28,7 +30,7 @@ macro_rules! handle_literal { if !range.is_empty() { #[allow(clippy::indexing_slicing)] // TODO(#1668) Clippy exceptions need docs or fixing. - return Ok(Some(PatternToken::Literal { + return Ok(Some(ParsedPatternItem::Literal { content: Cow::Borrowed(&$self.input[range]), quoted: $quoted, })); @@ -39,7 +41,10 @@ macro_rules! handle_literal { } /// Options passed to the constructor of [`Parser`]. +/// +/// ✨ *Enabled with the `alloc` Cargo feature.* #[derive(Debug)] +#[non_exhaustive] pub struct ParserOptions { /// Controls whether ASCII letters can appear in the raw /// pattern. @@ -49,31 +54,41 @@ pub struct ParserOptions { /// /// If set to `false`, ASCII letters can only appear in quoted literals, /// like "{0} 'days'". + /// + /// Default is `true`. pub allow_raw_letters: bool, } +impl Default for ParserOptions { + fn default() -> Self { + Self { + allow_raw_letters: true, + } + } +} + /// Placeholder pattern parser. /// /// The parser allows for handling flexible range of generic patterns /// with placeholders. -/// A placeholder may be anything that can be parsed from an `&str` and -/// must be enclosed in `{` and `}` characters in the input pattern string. +/// +/// The [`Parser`] is generic over any placeholder which implements [`FromStr`] +/// allowing the consumer to parse placeholder patterns such as "{0}, {1}", +/// "{date}, {time}" or any other. A placeholder must be enclosed in `{` and `}` +/// characters in the input pattern string. /// /// At the moment the parser is written as a custom fallible iterator. /// +/// ✨ *Enabled with the `alloc` Cargo feature.* +/// /// # Examples /// /// ``` -/// use icu_pattern::{Parser, ParserOptions, PatternToken}; +/// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions}; /// /// let input = "{0}, {1}"; /// -/// let mut parser = Parser::new( -/// input, -/// ParserOptions { -/// allow_raw_letters: false, -/// }, -/// ); +/// let mut parser = Parser::new(input, ParserOptions::default()); /// /// let mut result = vec![]; /// @@ -86,12 +101,12 @@ pub struct ParserOptions { /// assert_eq!( /// result, /// &[ -/// PatternToken::Placeholder(0), -/// PatternToken::Literal { +/// ParsedPatternItem::Placeholder(0), +/// ParsedPatternItem::Literal { /// content: ", ".into(), /// quoted: false /// }, -/// PatternToken::Placeholder(1), +/// ParsedPatternItem::Placeholder(1), /// ] /// ); /// ``` @@ -102,16 +117,11 @@ pub struct ParserOptions { /// /// ## Examples /// ``` -/// use icu_pattern::{Parser, ParserOptions, PatternToken}; +/// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions}; /// /// let input = "{start}, {end}"; /// -/// let mut parser = Parser::new( -/// input, -/// ParserOptions { -/// allow_raw_letters: false, -/// }, -/// ); +/// let mut parser = Parser::new(input, ParserOptions::default()); /// /// let mut result = vec![]; /// @@ -124,19 +134,19 @@ pub struct ParserOptions { /// assert_eq!( /// result, /// &[ -/// PatternToken::Placeholder("start".to_owned()), -/// PatternToken::Literal { +/// ParsedPatternItem::Placeholder("start".to_owned()), +/// ParsedPatternItem::Literal { /// content: ", ".into(), /// quoted: false /// }, -/// PatternToken::Placeholder("end".to_owned()), +/// ParsedPatternItem::Placeholder("end".to_owned()), /// ] /// ); /// ``` /// /// # Type parameters /// -/// - `P`: The type of the placeholder used as a key for the [`ReplacementProvider`]. +/// - `P`: The type of the placeholder used as a key for the [`PlaceholderValueProvider`]. /// /// # Lifetimes /// @@ -170,16 +180,11 @@ pub struct ParserOptions { /// /// ### Examples /// ``` -/// use icu_pattern::{Parser, ParserOptions, PatternToken}; +/// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions}; /// /// let input = "{0} 'and' {1}"; /// -/// let mut parser = Parser::new( -/// input, -/// ParserOptions { -/// allow_raw_letters: false, -/// }, -/// ); +/// let mut parser = Parser::new(input, ParserOptions::default()); /// /// let mut result = vec![]; /// @@ -192,20 +197,20 @@ pub struct ParserOptions { /// assert_eq!( /// result, /// &[ -/// PatternToken::Placeholder(0), -/// PatternToken::Literal { +/// ParsedPatternItem::Placeholder(0), +/// ParsedPatternItem::Literal { /// content: " ".into(), /// quoted: false /// }, -/// PatternToken::Literal { +/// ParsedPatternItem::Literal { /// content: "and".into(), /// quoted: true /// }, -/// PatternToken::Literal { +/// ParsedPatternItem::Literal { /// content: " ".into(), /// quoted: false /// }, -/// PatternToken::Placeholder(1), +/// ParsedPatternItem::Placeholder(1), /// ] /// ); /// ``` @@ -242,9 +247,9 @@ pub struct ParserOptions { /// /// [`TR35 2.6.1]: https://unicode.org/reports/tr35/tr35-dates.html#dateTimeFormat /// [`RFC 2924`]: https://github.com/rust-lang/rfcs/pull/2924 -/// [`Item`]: std::iter::Iterator::Item -/// [`TryFrom`]: std::convert::TryFrom -/// [`ReplacementProvider`]: crate::ReplacementProvider +/// [`Item`]: core::iter::Iterator::Item +/// [`TryFrom`]: core::convert::TryFrom +/// [`PlaceholderValueProvider`]: crate::PlaceholderValueProvider #[derive(Debug)] pub struct Parser<'p, P> { input: &'p str, @@ -268,12 +273,7 @@ impl<'p, P> Parser<'p, P> { /// # Examples /// ``` /// use icu_pattern::{Parser, ParserOptions}; - /// let mut parser = Parser::::new( - /// "{0}, {1}", - /// ParserOptions { - /// allow_raw_letters: false, - /// }, - /// ); + /// let mut parser = Parser::::new("{0}, {1}", ParserOptions::default()); /// ``` pub fn new(input: &'p str, options: ParserOptions) -> Self { Self { @@ -295,32 +295,33 @@ impl<'p, P> Parser<'p, P> { /// /// # Examples /// ``` - /// use icu_pattern::{Parser, ParserOptions, PatternToken}; + /// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions}; /// - /// let mut parser = Parser::::new( - /// "{0}, {1}", - /// ParserOptions { - /// allow_raw_letters: false, - /// }, - /// ); + /// let mut parser = Parser::::new("{0}, {1}", ParserOptions::default()); /// /// // A call to try_next() returns the next value… - /// assert_eq!(Ok(Some(PatternToken::Placeholder(0))), parser.try_next()); /// assert_eq!( - /// Ok(Some(PatternToken::Literal { + /// Ok(Some(ParsedPatternItem::Placeholder(0))), + /// parser.try_next() + /// ); + /// assert_eq!( + /// Ok(Some(ParsedPatternItem::Literal { /// content: ", ".into(), /// quoted: false /// })), /// parser.try_next() /// ); - /// assert_eq!(Ok(Some(PatternToken::Placeholder(1))), parser.try_next()); + /// assert_eq!( + /// Ok(Some(ParsedPatternItem::Placeholder(1))), + /// parser.try_next() + /// ); /// /// // … and then `None` once it's over. /// assert_eq!(Ok(None), parser.try_next()); /// ``` pub fn try_next( &mut self, - ) -> Result>, ParserError<

::Err>> + ) -> Result>, ParserError<

::Err>> where P: FromStr, P::Err: Debug, @@ -333,7 +334,7 @@ impl<'p, P> Parser<'p, P> { // TODO(#1668) Clippy exceptions need docs or fixing. return self.input[range] .parse() - .map(|ret| Some(PatternToken::Placeholder(ret))) + .map(|ret| Some(ParsedPatternItem::Placeholder(ret))) .map_err(ParserError::InvalidPlaceholder); } ParserState::QuotedLiteral if *b == b'\'' => { @@ -377,7 +378,7 @@ impl<'p, P> Parser<'p, P> { self.start_idx = self.len; #[allow(clippy::indexing_slicing)] // TODO(#1668) Clippy exceptions need docs or fixing. - Ok(Some(PatternToken::Literal { + Ok(Some(ParsedPatternItem::Literal { content: Cow::Borrowed(&self.input[range]), quoted: false, })) @@ -388,79 +389,96 @@ impl<'p, P> Parser<'p, P> { } } - fn advance_state(&mut self, idx: usize, next_state: ParserState) -> std::ops::Range { + fn advance_state(&mut self, idx: usize, next_state: ParserState) -> core::ops::Range { let range = self.start_idx..idx; self.idx = idx + 1; self.start_idx = self.idx; self.state = next_state; range } + + /// Mutates this parser and collects all [`ParsedPatternItem`]s into a vector. + pub fn try_collect_into_vec( + mut self, + ) -> Result>, ParserError<

::Err>> + where + P: FromStr, + P::Err: Debug, + { + let mut result = vec![]; + while let Some(token) = self.try_next()? { + result.push(token); + } + Ok(result) + } } #[cfg(test)] mod tests { use super::*; - use crate::pattern::Pattern; - use std::{convert::TryInto, ops::Deref}; + use core::ops::Deref; #[test] fn pattern_parse_placeholders() { let samples = vec![ - ("{0}", vec![PatternToken::Placeholder(0)]), + ("{0}", vec![ParsedPatternItem::Placeholder(0)]), ( "{0}{1}", - vec![PatternToken::Placeholder(0), PatternToken::Placeholder(1)], + vec![ + ParsedPatternItem::Placeholder(0), + ParsedPatternItem::Placeholder(1), + ], ), ( "{0} 'at' {1}", vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { + ParsedPatternItem::Placeholder(0), + ParsedPatternItem::Literal { content: " ".into(), quoted: false, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "at".into(), quoted: true, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: " ".into(), quoted: false, }, - PatternToken::Placeholder(1), + ParsedPatternItem::Placeholder(1), ], ), ( "{0}'at'{1}", vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { + ParsedPatternItem::Placeholder(0), + ParsedPatternItem::Literal { content: "at".into(), quoted: true, }, - PatternToken::Placeholder(1), + ParsedPatternItem::Placeholder(1), ], ), ( "'{0}' 'at' '{1}'", vec![ - PatternToken::Literal { + ParsedPatternItem::Literal { content: "{0}".into(), quoted: true, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: " ".into(), quoted: false, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "at".into(), quoted: true, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: " ".into(), quoted: false, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "{1}".into(), quoted: true, }, @@ -469,33 +487,33 @@ mod tests { ( "'PRE' {0} 'and' {1} 'POST'", vec![ - PatternToken::Literal { + ParsedPatternItem::Literal { content: "PRE".into(), quoted: true, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: " ".into(), quoted: false, }, - PatternToken::Placeholder(0), - PatternToken::Literal { + ParsedPatternItem::Placeholder(0), + ParsedPatternItem::Literal { content: " ".into(), quoted: false, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "and".into(), quoted: true, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: " ".into(), quoted: false, }, - PatternToken::Placeholder(1), - PatternToken::Literal { + ParsedPatternItem::Placeholder(1), + ParsedPatternItem::Literal { content: " ".into(), quoted: false, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "POST".into(), quoted: true, }, @@ -504,28 +522,28 @@ mod tests { ( "{0} o''clock and 'o''clock'", vec![ - PatternToken::Placeholder(0), - PatternToken::Literal { + ParsedPatternItem::Placeholder(0), + ParsedPatternItem::Literal { content: " o".into(), quoted: false, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "'".into(), quoted: false, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "clock and ".into(), quoted: false, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "o".into(), quoted: true, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "'".into(), quoted: true, }, - PatternToken::Literal { + ParsedPatternItem::Literal { content: "clock".into(), quoted: true, }, @@ -540,11 +558,13 @@ mod tests { allow_raw_letters: true, }, ); - let result: Pattern<_> = parser.try_into().expect("Failed to parse a pattern"); + let result = parser + .try_collect_into_vec() + .expect("Failed to parse a pattern"); assert_eq!(result.deref(), expected,); } - let broken: Vec<(_, Option>)> = vec![ + let broken: Vec<(_, Option>)> = vec![ ("{", Some(ParserError::UnclosedPlaceholder)), ("{0", Some(ParserError::UnclosedPlaceholder)), ("{01", Some(ParserError::UnclosedPlaceholder)), @@ -554,7 +574,7 @@ mod tests { // ``` // ParserError::InvalidPlaceholder( // ParseIntError { - // kind: std::num::IntErrorKind::InvalidDigit + // kind: core::num::IntErrorKind::InvalidDigit // } // ), // ``` @@ -575,7 +595,7 @@ mod tests { allow_raw_letters: false, }, ); - let result: Result, _> = parser.try_into(); + let result = parser.try_collect_into_vec(); if let Some(error) = error { assert_eq!(result.expect_err("Should have failed."), error,); } else { diff --git a/utils/pattern/src/token.rs b/utils/pattern/src/parser/token.rs similarity index 63% rename from utils/pattern/src/token.rs rename to utils/pattern/src/parser/token.rs index ddf31956eb2..13576cfd858 100644 --- a/utils/pattern/src/token.rs +++ b/utils/pattern/src/parser/token.rs @@ -2,23 +2,20 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use std::borrow::Cow; +use alloc::borrow::Cow; -/// A token returned by the [`Parser`]. +/// A [`PatternItem`] with additional detail returned by the [`Parser`]. +/// +/// ✨ *Enabled with the `alloc` Cargo feature.* /// /// # Examples /// /// ``` -/// use icu_pattern::{Parser, ParserOptions, PatternToken}; +/// use icu_pattern::{ParsedPatternItem, Parser, ParserOptions}; /// /// let input = "{0}, {1}"; /// -/// let mut parser = Parser::new( -/// input, -/// ParserOptions { -/// allow_raw_letters: false, -/// }, -/// ); +/// let mut parser = Parser::new(input, ParserOptions::default()); /// /// let mut result = vec![]; /// @@ -31,12 +28,12 @@ use std::borrow::Cow; /// assert_eq!( /// result, /// &[ -/// PatternToken::Placeholder(0), -/// PatternToken::Literal { +/// ParsedPatternItem::Placeholder(0), +/// ParsedPatternItem::Literal { /// content: ", ".into(), /// quoted: false /// }, -/// PatternToken::Placeholder(1), +/// ParsedPatternItem::Placeholder(1), /// ] /// ); /// ``` @@ -50,14 +47,16 @@ use std::borrow::Cow; /// - `s`: The life time of an input string slice being parsed. /// /// [`Parser`]: crate::Parser -/// [`FromStr`]: std::str::FromStr +/// [`PatternItem`]: crate::PatternItem +/// [`FromStr`]: core::str::FromStr #[derive(PartialEq, Debug, Clone)] -pub enum PatternToken<'s, P> { +#[non_exhaustive] +pub enum ParsedPatternItem<'s, P> { Placeholder(P), Literal { content: Cow<'s, str>, quoted: bool }, } -impl<'s, P> From<(&'s str, bool)> for PatternToken<'s, P> { +impl<'s, P> From<(&'s str, bool)> for ParsedPatternItem<'s, P> { fn from(input: (&'s str, bool)) -> Self { Self::Literal { content: Cow::Borrowed(input.0), diff --git a/utils/pattern/src/pattern/error.rs b/utils/pattern/src/pattern/error.rs deleted file mode 100644 index 68dd568cf8b..00000000000 --- a/utils/pattern/src/pattern/error.rs +++ /dev/null @@ -1,51 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use crate::interpolator::InterpolatorError; -use displaydoc::Display; -use std::{fmt::Debug, str::FromStr}; - -/// An error returned from a pattern. -/// -/// # Type parameters -/// -/// - `K`: A key for the replacement provider. -#[derive(Display, Debug, PartialEq)] -pub enum PatternError -where - K: Debug + FromStr + PartialEq, - K::Err: Debug + PartialEq, -{ - #[displaydoc("Interpolator error: {0:?}")] - Interpolator(InterpolatorError), - #[displaydoc("Format error: {0:?}")] - Format(std::fmt::Error), -} - -impl std::error::Error for PatternError -where - K: Debug + FromStr + PartialEq, - K::Err: Debug + PartialEq, -{ -} - -impl From> for PatternError -where - K: Debug + FromStr + PartialEq, - K::Err: Debug + PartialEq, -{ - fn from(err: InterpolatorError) -> Self { - Self::Interpolator(err) - } -} - -impl From for PatternError -where - K: Debug + FromStr + PartialEq, - K::Err: Debug + PartialEq, -{ - fn from(err: std::fmt::Error) -> Self { - Self::Format(err) - } -} diff --git a/utils/pattern/src/pattern/mod.rs b/utils/pattern/src/pattern/mod.rs deleted file mode 100644 index 3c1c5743cf9..00000000000 --- a/utils/pattern/src/pattern/mod.rs +++ /dev/null @@ -1,322 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -mod error; - -use crate::{ - interpolator::{InterpolatedKind, Interpolator}, - parser::{Parser, ParserError, ParserOptions}, - replacement::ReplacementProvider, - token::PatternToken, -}; -pub use error::PatternError; -use std::{ - convert::{TryFrom, TryInto}, - fmt::{Debug, Display, Write}, - ops::Deref, - str::FromStr, -}; -use writeable::Writeable; - -/// `Pattern` stores the result of parsing operation as a vector -/// of [`PatternToken`] elements. -/// -/// # Type parameters -/// -/// - `P`: The type of the placeholder used as a key for the [`ReplacementProvider`]. -/// -/// # Lifetimes -/// -/// - `p`: The life time of an input string slice to be parsed. -/// -/// [`ReplacementProvider`]: crate::ReplacementProvider -#[derive(Debug)] -pub struct Pattern<'s, P>(pub(crate) Vec>); - -impl<'s, P> Pattern<'s, P> { - /// Interpolates the `Pattern` with provided replacements and returns - /// a [`InterpolatedPattern`] structure. - /// - /// # Example - /// - /// ``` - /// use icu_pattern::{InterpolatedKind, Pattern}; - /// use std::ops::Deref; - /// - /// #[derive(Debug, PartialEq)] - /// struct Element(usize); - /// - /// let pattern = Pattern::try_from("${0}").expect("Failed to parse a pattern"); - /// - /// let replacements = vec![Element(5)]; - /// - /// let interpolated_pattern = pattern - /// .interpolate(&replacements) - /// .expect("Failed to interpolate"); - /// - /// assert_eq!( - /// interpolated_pattern.deref(), - /// &[ - /// InterpolatedKind::Literal(&"$".into()), - /// InterpolatedKind::Element(&Element(5)), - /// ], - /// ); - /// ``` - /// - /// For allocation-free interpolation, see `interpolate_to_string` or - /// `interpolate_to_write`. - /// - /// For lower level interpolation iterator see [`Interpolator`]. - pub fn interpolate<'i, E, R>( - &'i self, - replacements: &'i R, - ) -> Result, PatternError> - where - R: ReplacementProvider<'i, E, Key = P>, - P: Debug + FromStr + PartialEq + Clone, -

::Err: Debug + PartialEq, - { - let mut interpolator = Interpolator::new(&self.0, replacements); - - let mut result = vec![]; - while let Some(ik) = interpolator.try_next()? { - result.push(ik); - } - Ok(InterpolatedPattern(result)) - } - - /// Interpolates the `Pattern` with provided replacements and a new - /// [`String`]. - /// - /// # Example - /// - /// ``` - /// use icu_pattern::Pattern; - /// use std::fmt::Display; - /// - /// #[derive(Debug, PartialEq)] - /// struct Element(usize); - /// - /// impl Display for Element { - /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - /// self.0.fmt(f) - /// } - /// } - /// - /// let pattern = Pattern::try_from("${0}").expect("Failed to parse a pattern"); - /// - /// let replacements = vec![Element(5)]; - /// - /// let interpolated_pattern = pattern - /// .interpolate_to_string(&replacements) - /// .expect("Failed to interpolate"); - /// - /// assert_eq!(interpolated_pattern, "$5",); - /// ``` - /// - /// For buffer write interpolation, see `interpolate_to_write`. - /// - /// For lower level interpolation iterator see [`Interpolator`]. - pub fn interpolate_to_string<'i, E, R>( - &'i self, - replacements: &'i R, - ) -> Result> - where - R: ReplacementProvider<'i, E, Key = P>, - P: Debug + FromStr + PartialEq + Clone, -

::Err: Debug + PartialEq, - E: 'i + Display, - { - let mut result = String::new(); - self.interpolate_to_write(replacements, &mut result)?; - Ok(result) - } - - /// Interpolates the `Pattern` writing the result into a buffer. - /// - /// # Example - /// - /// ``` - /// use icu_pattern::Pattern; - /// use std::fmt::Display; - /// - /// #[derive(Debug, PartialEq)] - /// struct Element(usize); - /// - /// impl Display for Element { - /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - /// self.0.fmt(f) - /// } - /// } - /// - /// let pattern = Pattern::try_from("${0}").expect("Failed to parse a pattern"); - /// - /// let replacements = vec![Element(5)]; - /// - /// let mut result = String::new(); - /// pattern - /// .interpolate_to_write(&replacements, &mut result) - /// .expect("Failed to interpolate"); - /// - /// assert_eq!(result, "$5",); - /// ``` - pub fn interpolate_to_write<'i, E, R, W>( - &'i self, - replacements: &'i R, - sink: &mut W, - ) -> Result<(), PatternError> - where - R: ReplacementProvider<'i, E, Key = P>, - P: Debug + FromStr + PartialEq + Clone, -

::Err: Debug + PartialEq, - E: 'i + Display, - W: Write, - { - let mut interpolator = Interpolator::new(&self.0, replacements); - while let Some(ik) = interpolator.try_next()? { - write!(sink, "{ik}")?; - } - Ok(()) - } - - /// Interpolates the `Pattern` writing the result into a [`Writeable`] buffer. - /// - /// # Example - /// - /// ``` - /// use icu_pattern::Pattern; - /// - /// let pattern = Pattern::try_from("${0}").expect("Failed to parse a pattern"); - /// - /// let replacements = vec![5]; - /// - /// let mut result = String::new(); - /// pattern - /// .interpolate_to_writeable(&replacements, &mut result) - /// .expect("Failed to interpolate"); - /// - /// assert_eq!(result, "$5",); - /// ``` - pub fn interpolate_to_writeable<'i, E, R, W>( - &'i self, - replacements: &'i R, - sink: &mut W, - ) -> Result<(), PatternError> - where - R: ReplacementProvider<'i, E, Key = P>, - P: Debug + FromStr + PartialEq + Clone, -

::Err: Debug + PartialEq, - E: 'i + Writeable, - W: Write, - { - let mut interpolator = Interpolator::new(&self.0, replacements); - while let Some(ik) = interpolator.try_next()? { - ik.write_to(sink)?; - } - Ok(()) - } -} - -impl<'s, P> TryFrom<&'s str> for Pattern<'s, P> -where - P: FromStr, -

::Err: Debug, -{ - type Error = ParserError<

::Err>; - - fn try_from(input: &'s str) -> Result { - Parser::new( - input, - ParserOptions { - allow_raw_letters: false, - }, - ) - .try_into() - } -} - -impl<'p, P> TryFrom> for Pattern<'p, P> -where - P: FromStr, -

::Err: Debug, -{ - type Error = ParserError<

::Err>; - - fn try_from(mut parser: Parser<'p, P>) -> Result { - let mut result = vec![]; - while let Some(token) = parser.try_next()? { - result.push(token); - } - Ok(Self(result)) - } -} - -impl<'p, P> From>> for Pattern<'p, P> -where - P: FromStr, -

::Err: Debug, -{ - fn from(tokens: Vec>) -> Self { - Self(tokens) - } -} - -impl<'p, P> Deref for Pattern<'p, P> { - type Target = [PatternToken<'p, P>]; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -/// `InterpolatedPattern` stores the result of parsing operation as a vector -/// of [`InterpolatedKind`] elements. -/// -/// # Type parameters -/// -/// - `E`: An element type returned by the `ReplacementProvider`. -/// -/// # Lifetimes -/// -/// - `i`: The life time of `ReplacementProvider`. -/// - `s`: The life time of literals stored in the `E` -/// -/// [`ReplacementProvider`]: crate::ReplacementProvider -#[derive(Debug, PartialEq)] -pub struct InterpolatedPattern<'i, 's, E>(Vec>); - -impl<'i, 's, E> Writeable for InterpolatedPattern<'i, 's, E> -where - E: Writeable, -{ - fn write_to(&self, sink: &mut W) -> std::result::Result<(), std::fmt::Error> - where - W: std::fmt::Write + ?Sized, - { - for elem in &self.0 { - elem.write_to(sink)?; - } - Ok(()) - } -} - -impl<'i, 's, E> Display for InterpolatedPattern<'i, 's, E> -where - E: Display, -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for elem in &self.0 { - write!(f, "{elem}")?; - } - Ok(()) - } -} - -impl<'i, 's, E> Deref for InterpolatedPattern<'i, 's, E> { - type Target = [InterpolatedKind<'i, 's, E>]; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} diff --git a/utils/pattern/src/replacement.rs b/utils/pattern/src/replacement.rs deleted file mode 100644 index 6dc04b6040d..00000000000 --- a/utils/pattern/src/replacement.rs +++ /dev/null @@ -1,157 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use std::collections::HashMap; - -/// A trait which has to be implemented on any type that will be used to -/// provide replacement values for the placeholder pattern during interpolation. -/// -/// # Examples -/// -/// Since the trait comes with implementations for [`Vec`] and [`HashMap`][`HashMap`], -/// most common cases are already covered and manual implementation of the trait -/// is not needed. -/// -/// The consumer may want to implement it in less conventional cases where the replacements -/// are stored in a different data collection. -/// To illustrate such example, we'll use a `HashMap`, a map where keys value and position may -/// be disassociate. -/// -/// ``` -/// use icu_pattern::{ -/// InterpolatedKind, Interpolator, Parser, ParserOptions, Pattern, -/// ReplacementProvider, -/// }; -/// use std::collections::HashMap; -/// -/// #[derive(Debug, PartialEq)] -/// enum Element { -/// TokenZero, -/// TokenFive, -/// } -/// -/// impl<'r> ReplacementProvider<'r, Element> for HashMap> { -/// type Key = usize; -/// type Iter = std::slice::Iter<'r, Element>; -/// -/// fn take_replacement(&'r self, key: &usize) -> Option { -/// let replacement = self.get(key)?; -/// Some(replacement.iter()) -/// } -/// } -/// -/// let mut replacements = HashMap::new(); -/// replacements.insert(0, vec![Element::TokenZero]); -/// replacements.insert(5, vec![Element::TokenFive]); -/// -/// let pattern: Pattern<_> = Parser::new( -/// "{5}, {0}", -/// ParserOptions { -/// allow_raw_letters: false, -/// }, -/// ) -/// .try_into() -/// .unwrap(); -/// let mut interpolator = Interpolator::new(&pattern, &replacements); -/// -/// assert_eq!( -/// Ok(Some(InterpolatedKind::Element(&Element::TokenFive))), -/// interpolator.try_next() -/// ); -/// assert_eq!( -/// Ok(Some(InterpolatedKind::Literal(&", ".into()))), -/// interpolator.try_next() -/// ); -/// assert_eq!( -/// Ok(Some(InterpolatedKind::Element(&Element::TokenZero))), -/// interpolator.try_next() -/// ); -/// assert_eq!(Ok(None), interpolator.try_next()); -/// ``` -pub trait ReplacementProvider<'r, E: 'r> { - type Key; - type Iter: Iterator; - - /// Retrieves a replacement iterator to be used by the [`Interpolator`] in - /// place of a placeholder. - /// - /// # Examples - /// ``` - /// use icu_pattern::ReplacementProvider; - /// use std::collections::HashMap; - /// - /// #[derive(Debug, PartialEq)] - /// enum Element { - /// TokenFive, - /// } - /// - /// impl<'r> ReplacementProvider<'r, Element> for HashMap> { - /// type Key = usize; - /// type Iter = std::slice::Iter<'r, Element>; - /// - /// fn take_replacement(&'r self, key: &usize) -> Option { - /// let replacement = self.get(key)?; - /// Some(replacement.iter()) - /// } - /// } - /// - /// let mut replacements = HashMap::new(); - /// replacements.insert(5, vec![Element::TokenFive]); - /// - /// assert_eq!( - /// replacements.take_replacement(&5).map(|r| r.collect()), - /// Some(vec![&Element::TokenFive]) - /// ); - /// - /// assert_eq!( - /// replacements - /// .take_replacement(&1) - /// .map(|r| r.collect::>()), - /// None - /// ); - /// ``` - /// - /// [`Interpolator`]: crate::interpolator::Interpolator - fn take_replacement(&'r self, key: &Self::Key) -> Option; -} - -impl<'r, E: 'r> ReplacementProvider<'r, E> for Vec> { - type Key = usize; - type Iter = std::slice::Iter<'r, E>; - - fn take_replacement(&'r self, input: &usize) -> Option { - let replacement = self.get(*input)?; - Some(replacement.iter()) - } -} - -impl<'r, E: 'r> ReplacementProvider<'r, E> for Vec { - type Key = usize; - type Iter = std::iter::Once<&'r E>; - - fn take_replacement(&'r self, input: &usize) -> Option { - let replacement = self.get(*input)?; - Some(std::iter::once(replacement)) - } -} - -impl<'r, E: 'r> ReplacementProvider<'r, E> for HashMap> { - type Key = String; - type Iter = std::slice::Iter<'r, E>; - - fn take_replacement(&'r self, input: &String) -> Option { - let replacement = self.get(input)?; - Some(replacement.iter()) - } -} - -impl<'r, E: 'r> ReplacementProvider<'r, E> for HashMap { - type Key = String; - type Iter = std::iter::Once<&'r E>; - - fn take_replacement(&'r self, input: &String) -> Option { - let replacement = self.get(input)?; - Some(std::iter::once(replacement)) - } -} diff --git a/utils/pattern/src/single.rs b/utils/pattern/src/single.rs new file mode 100644 index 00000000000..43406ac6f9f --- /dev/null +++ b/utils/pattern/src/single.rs @@ -0,0 +1,323 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Code for the [`SinglePlaceholder`] pattern backend. + +use core::{cmp::Ordering, str::FromStr}; +use writeable::Writeable; + +use crate::common::*; +use crate::Error; + +#[cfg(feature = "alloc")] +use alloc::string::String; + +/// A singleton enum for the [`SinglePlaceholder`] pattern backend. +/// +/// # Examples +/// +/// ``` +/// use core::cmp::Ordering; +/// use icu_pattern::PatternItem; +/// use icu_pattern::SinglePlaceholder; +/// use icu_pattern::SinglePlaceholderKey; +/// use icu_pattern::SinglePlaceholderPattern; +/// +/// // Parse the string syntax and check the resulting data store: +/// let pattern = +/// SinglePlaceholderPattern::try_from_str("Hello, {0}!").unwrap(); +/// +/// assert_eq!( +/// pattern.iter().cmp( +/// [ +/// PatternItem::Literal("Hello, "), +/// PatternItem::Placeholder(SinglePlaceholderKey::Singleton), +/// PatternItem::Literal("!") +/// ] +/// .into_iter() +/// ), +/// Ordering::Equal +/// ); +/// ``` +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_enums)] // Singleton +pub enum SinglePlaceholderKey { + Singleton, +} + +impl FromStr for SinglePlaceholderKey { + type Err = core::convert::Infallible; + fn from_str(_: &str) -> Result { + Ok(Self::Singleton) + } +} + +impl PlaceholderValueProvider for (W,) +where + W: Writeable, +{ + type W<'a> = &'a W where W: 'a; + fn value_for(&self, _key: SinglePlaceholderKey) -> Self::W<'_> { + &self.0 + } +} + +impl PlaceholderValueProvider for [W; 1] +where + W: Writeable, +{ + type W<'a> = &'a W where W: 'a; + fn value_for(&self, _key: SinglePlaceholderKey) -> Self::W<'_> { + let [value] = self; + value + } +} + +/// Backend for patterns containing zero or one placeholder. +/// +/// This empty type is not constructible. +/// +/// # Placeholder Keys +/// +/// The placeholder is always [`SinglePlaceholderKey::Singleton`]. +/// +/// In [`Pattern::interpolate()`], pass a single-element array or tuple. +/// +/// # Encoding Details +/// +/// The first code point of the string is 1 plus the byte offset of the placeholder counting from +/// after that initial code point. If zero, there is no placeholder. +/// +/// # Examples +/// +/// Parsing a pattern into the encoding: +/// +/// ``` +/// use icu_pattern::Pattern; +/// use icu_pattern::SinglePlaceholder; +/// +/// // Parse the string syntax and check the resulting data store: +/// let store = Pattern::::try_from_str("Hello, {0}!") +/// .unwrap() +/// .take_store(); +/// +/// assert_eq!("\u{8}Hello, !", store); +/// ``` +/// +/// Example patterns supported by this backend: +/// +/// ``` +/// use icu_pattern::Pattern; +/// use icu_pattern::SinglePlaceholder; +/// +/// // Single numeric placeholder: +/// assert_eq!( +/// Pattern::::try_from_str("{0} days ago") +/// .unwrap() +/// .interpolate_to_string([5]), +/// "5 days ago", +/// ); +/// +/// // Single named placeholder: +/// assert_eq!( +/// Pattern::::try_from_str("{name}") +/// .unwrap() +/// .interpolate_to_string(["Alice"]), +/// "Alice", +/// ); +/// +/// // No placeholder (note, the placeholder value is never accessed): +/// assert_eq!( +/// Pattern::::try_from_str("yesterday") +/// .unwrap() +/// .interpolate_to_string(["hi"]), +/// "yesterday", +/// ); +/// +/// // Escaped placeholder and a real placeholder: +/// assert_eq!( +/// Pattern::::try_from_str("'{0}' {1}") +/// .unwrap() +/// .interpolate_to_string(("hi",)), +/// "{0} hi", +/// ); +/// ``` +/// +/// [`Pattern::interpolate()`]: crate::Pattern::interpolate +#[derive(Debug)] +#[allow(clippy::exhaustive_enums)] // Empty Enum +pub enum SinglePlaceholder {} + +impl crate::private::Sealed for SinglePlaceholder {} + +impl PatternBackend for SinglePlaceholder { + type PlaceholderKey = SinglePlaceholderKey; + type Store = str; + type Iter<'a> = SinglePlaceholderPatternIterator<'a>; + + fn validate_store(store: &Self::Store) -> Result<(), Error> { + let placeholder_offset_char = store.chars().next().ok_or(Error::InvalidPattern)?; + let initial_offset = placeholder_offset_char.len_utf8(); + let placeholder_offset = placeholder_offset_char as usize; + if placeholder_offset > store.len() - initial_offset + 1 { + return Err(Error::InvalidPattern); + } + if placeholder_offset >= 0xD800 { + return Err(Error::InvalidPattern); + } + Ok(()) + } + + fn iter_items(store: &Self::Store) -> Self::Iter<'_> { + let placeholder_offset_char = match store.chars().next() { + Some(i) => i, + None => { + debug_assert!(false); + '\0' + } + }; + let initial_offset = placeholder_offset_char.len_utf8(); + SinglePlaceholderPatternIterator { + store, + placeholder_offset: placeholder_offset_char as usize + initial_offset - 1, + current_offset: initial_offset, + } + } + + #[cfg(feature = "alloc")] + fn try_from_items< + 'a, + I: Iterator, Error>>, + >( + items: I, + ) -> Result { + let mut result = String::new(); + let mut seen_placeholder = false; + for item in items { + match item? { + PatternItemCow::Literal(s) => result.push_str(&s), + PatternItemCow::Placeholder(_) if !seen_placeholder => { + seen_placeholder = true; + let placeholder_offset = + u32::try_from(result.len() + 1).map_err(|_| Error::InvalidPattern)?; + if placeholder_offset >= 0xD800 { + return Err(Error::InvalidPattern); + } + let placeholder_offset_char = + char::try_from(placeholder_offset).map_err(|_| Error::InvalidPattern)?; + result.insert(0, placeholder_offset_char); + } + PatternItemCow::Placeholder(_) => { + return Err(Error::InvalidPattern); + } + } + } + if !seen_placeholder { + result.insert(0, '\0'); + } + Ok(result) + } +} + +#[doc(hidden)] // TODO(#4467): Should be internal +#[derive(Debug)] +pub struct SinglePlaceholderPatternIterator<'a> { + store: &'a str, + placeholder_offset: usize, + current_offset: usize, +} + +// Note: This impl is not exported via public bounds, but it might be in the +// future, and the compiler might be able to find it. The code is also +// reachable from `Iterator::size_hint`. +impl ExactSizeIterator for SinglePlaceholderPatternIterator<'_> { + fn len(&self) -> usize { + let placeholder_offset_char = match self.store.chars().next() { + Some(i) => i, + None => { + debug_assert!(false); + '\0' + } + }; + let initial_offset = placeholder_offset_char.len_utf8(); + let placeholder_offset = placeholder_offset_char as usize + initial_offset - 1; + let store_len = self.store.len(); + if placeholder_offset < initial_offset { + // No placeholder + if initial_offset < store_len { + // No placeholder, non-empty literal + 1 + } else { + // No placeholder, empty literal + 0 + } + } else if placeholder_offset == initial_offset { + // Has placeholder, empty prefix + if initial_offset < store_len { + // Has placeholder, empty prefix, non-empty suffix + 2 + } else { + // Has placeholder, empty prefix, empty suffix + 1 + } + } else if placeholder_offset < store_len { + // Has placeholder, non-empty prefix, non-empty suffix + 3 + } else { + // Has placeholder, non-empty prefix, empty suffix + 2 + } + } +} + +impl<'a> Iterator for SinglePlaceholderPatternIterator<'a> { + type Item = PatternItem<'a, SinglePlaceholderKey>; + fn next(&mut self) -> Option { + match self.current_offset.cmp(&self.placeholder_offset) { + Ordering::Less => { + // Prefix + let literal_str = match self.store.get(self.current_offset..self.placeholder_offset) + { + Some(s) => s, + None => { + debug_assert!(false, "offsets are in range"); + "" + } + }; + self.current_offset = self.placeholder_offset; + Some(PatternItem::Literal(literal_str)) + } + Ordering::Equal => { + // Placeholder + self.placeholder_offset = 0; + Some(PatternItem::Placeholder(SinglePlaceholderKey::Singleton)) + } + Ordering::Greater => { + // Suffix or end of string + let literal_str = match self.store.get(self.current_offset..) { + Some(s) => s, + None => { + debug_assert!(false, "offsets are in range"); + "" + } + }; + if literal_str.is_empty() { + // End of string + None + } else { + // Suffix + self.current_offset = self.store.len(); + Some(PatternItem::Literal(literal_str)) + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } +} + +// TODO(#1668): Add more tests