From 3b1906b2c0a82697750a2e4453e871ba26b966bb Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 4 Jul 2017 15:42:32 +0200 Subject: [PATCH 01/15] Replace Box with Rc in CompactCowStr. This make the Clone impl never allocate. --- Cargo.toml | 2 +- src/compact_cow_str.rs | 171 ++++++++++++++++++++--------------------- 2 files changed, 86 insertions(+), 87 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index eba9e089..4ff74f56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cssparser" -version = "0.17.0" +version = "0.18.0" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3" diff --git a/src/compact_cow_str.rs b/src/compact_cow_str.rs index c57d24da..aa65b4f8 100644 --- a/src/compact_cow_str.rs +++ b/src/compact_cow_str.rs @@ -9,106 +9,97 @@ use std::hash; use std::marker::PhantomData; use std::mem; use std::ops::Deref; +use std::rc::Rc; use std::slice; use std::str; - -// All bits set except the highest -const MAX_LEN: usize = !0 >> 1; - -// Only the highest bit -const OWNED_TAG: usize = MAX_LEN + 1; - -/// Like `Cow<'a, str>`, but with smaller `std::mem::size_of`. (Two words instead of four.) +use std::usize; + +/// A string that is either shared (heap-allocated and reference-counted) or borrowed. +/// +/// Equivalent to `enum { Borrowed(&'a str), Shared(Rc) }`, but stored more compactly. +/// +/// FIXME(https://github.com/rust-lang/rfcs/issues/1230): use an actual enum if/when +/// the compiler can do this layout optimization. pub struct CompactCowStr<'a> { - // `tagged_len` is a tag in its highest bit, and the string length in the rest of the bits. - // - // * If the tag is 1, the memory pointed to by `ptr` is owned - // and the lifetime parameter is irrelevant. - // `ptr` and `len` are the components of a `Box`. - // - // * If the tag is 0, the memory is borrowed. - // `ptr` and `len` are the components of a `&'a str`. + /// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared + /// In the meantime we abuse `&'static _` to get the effect of `NonZero<*const _>`. + /// `ptr` doesn’t really have the 'static lifetime! + ptr: &'static (), + + /// * If `borrowed_len_or_max == usize::MAX`, then `ptr` represents `NonZero<*const String>` + /// from `Rc::into_raw`. + /// The lifetime parameter `'a` is irrelevant in this case. + /// + /// * Otherwise, `ptr` represents the `NonZero<*const u8>` data component of `&'a str`, + /// and `borrowed_len_or_max` its length. + borrowed_len_or_max: usize, - // FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared - ptr: *const u8, - tagged_len: usize, - phantom: PhantomData<&'a str>, + phantom: PhantomData>>, +} + +fn _static_assert_same_size<'a>() { + // "Instantiate" the generic function without calling it. + let _ = mem::transmute::, Option>>; } impl<'a> From<&'a str> for CompactCowStr<'a> { #[inline] fn from(s: &'a str) -> Self { let len = s.len(); - assert!(len <= MAX_LEN); + assert!(len < usize::MAX); CompactCowStr { - ptr: s.as_ptr(), - tagged_len: len, + ptr: unsafe { &*(s.as_ptr() as *const ()) }, + borrowed_len_or_max: len, phantom: PhantomData, } } } -impl<'a> From> for CompactCowStr<'a> { +impl<'a> From> for CompactCowStr<'a> { #[inline] - fn from(s: Box) -> Self { - let ptr = s.as_ptr(); - let len = s.len(); - assert!(len <= MAX_LEN); - mem::forget(s); + fn from(s: Rc) -> Self { + let ptr = unsafe { &*(Rc::into_raw(s) as *const ()) }; CompactCowStr { ptr: ptr, - tagged_len: len | OWNED_TAG, + borrowed_len_or_max: usize::MAX, phantom: PhantomData, } } } impl<'a> CompactCowStr<'a> { - /// Whether this string refers to borrowed memory - /// (as opposed to owned, which would be freed when `CompactCowStr` goes out of scope). - #[inline] - pub fn is_borrowed(&self) -> bool { - (self.tagged_len & OWNED_TAG) == 0 - } - - /// The length of this string - #[inline] - pub fn len(&self) -> usize { - self.tagged_len & !OWNED_TAG - } - - // Intentionally private since it is easy to use incorrectly. #[inline] - fn as_raw_str(&self) -> *const str { - unsafe { - str::from_utf8_unchecked(slice::from_raw_parts(self.ptr, self.len())) + fn unpack(&self) -> Result<&'a str, *const String> { + if self.borrowed_len_or_max == usize::MAX { + Err(self.ptr as *const () as *const String) + } else { + unsafe { + Ok(str::from_utf8_unchecked(slice::from_raw_parts( + self.ptr as *const () as *const u8, + self.borrowed_len_or_max, + ))) + } } } - /// If this string is borrowed, return a slice with the original lifetime, - /// not borrowing `self`. - /// - /// (`Deref` is implemented unconditionally, but returns a slice with a shorter lifetime.) #[inline] - pub fn as_str(&self) -> Option<&'a str> { - if self.is_borrowed() { - Some(unsafe { &*self.as_raw_str() }) - } else { - None - } + fn into_enum(self) -> Result<&'a str, Rc> { + self.unpack().map_err(|ptr| { + mem::forget(self); + unsafe { + Rc::from_raw(ptr) + } + }) } - /// Convert into `String`, re-using the memory allocation if it was already owned. + /// Convert into `String`, re-using an existing memory allocation if possible. #[inline] pub fn into_owned(self) -> String { - unsafe { - let raw = self.as_raw_str(); - let is_borrowed = self.is_borrowed(); - mem::forget(self); - if is_borrowed { - String::from(&*raw) - } else { - Box::from_raw(raw as *mut str).into_string() + match self.into_enum() { + Ok(s) => s.to_owned(), + Err(rc) => match Rc::try_unwrap(rc) { + Ok(s) => s, + Err(rc) => (*rc).clone() } } } @@ -117,10 +108,18 @@ impl<'a> CompactCowStr<'a> { impl<'a> Clone for CompactCowStr<'a> { #[inline] fn clone(&self) -> Self { - if self.is_borrowed() { - CompactCowStr { ..*self } - } else { - Self::from(String::from(&**self).into_boxed_str()) + match self.unpack() { + Err(ptr) => { + let rc = unsafe { + Rc::from_raw(ptr) + }; + let new_rc = rc.clone(); + mem::forget(rc); // Don’t actually take ownership of this strong reference + new_rc.into() + } + Ok(_) => { + CompactCowStr { ..*self } + } } } } @@ -128,10 +127,10 @@ impl<'a> Clone for CompactCowStr<'a> { impl<'a> Drop for CompactCowStr<'a> { #[inline] fn drop(&mut self) { - if !self.is_borrowed() { - unsafe { - Box::from_raw(self.as_raw_str() as *mut str); - } + if let Err(ptr) = self.unpack() { + mem::drop(unsafe { + Rc::from_raw(ptr) + }) } } } @@ -141,23 +140,20 @@ impl<'a> Deref for CompactCowStr<'a> { #[inline] fn deref(&self) -> &str { - unsafe { - &*self.as_raw_str() - } + self.unpack().unwrap_or_else(|ptr| unsafe { + &**ptr + }) } } impl<'a> From> for Cow<'a, str> { #[inline] fn from(cow: CompactCowStr<'a>) -> Self { - unsafe { - let raw = cow.as_raw_str(); - let is_borrowed = cow.is_borrowed(); - mem::forget(cow); - if is_borrowed { - Cow::Borrowed(&*raw) - } else { - Cow::Owned(Box::from_raw(raw as *mut str).into_string()) + match cow.into_enum() { + Ok(s) => Cow::Borrowed(s), + Err(rc) => match Rc::try_unwrap(rc) { + Ok(s) => Cow::Owned(s), + Err(rc) => Cow::Owned((*rc).clone()) } } } @@ -166,7 +162,7 @@ impl<'a> From> for Cow<'a, str> { impl<'a> From for CompactCowStr<'a> { #[inline] fn from(s: String) -> Self { - Self::from(s.into_boxed_str()) + Self::from(Rc::new(s)) } } @@ -180,6 +176,9 @@ impl<'a> From> for CompactCowStr<'a> { } } + +// Boilerplate / trivial impls below. + impl<'a> AsRef for CompactCowStr<'a> { #[inline] fn as_ref(&self) -> &str { From f4cfca9ddaad444ac9e86237b30603acfa4a1e17 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 4 Jul 2017 15:43:56 +0200 Subject: [PATCH 02/15] Rename CompactCowStr to CowRcStr --- src/compact_cow_str.rs | 50 +++++++++++++++++------------------ src/lib.rs | 2 +- src/parser.rs | 16 +++++------ src/rules_and_declarations.rs | 8 +++--- src/size_of_tests.rs | 4 +-- src/tests.rs | 8 +++--- src/tokenizer.rs | 28 ++++++++++---------- 7 files changed, 58 insertions(+), 58 deletions(-) diff --git a/src/compact_cow_str.rs b/src/compact_cow_str.rs index aa65b4f8..1e1988f6 100644 --- a/src/compact_cow_str.rs +++ b/src/compact_cow_str.rs @@ -20,7 +20,7 @@ use std::usize; /// /// FIXME(https://github.com/rust-lang/rfcs/issues/1230): use an actual enum if/when /// the compiler can do this layout optimization. -pub struct CompactCowStr<'a> { +pub struct CowRcStr<'a> { /// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared /// In the meantime we abuse `&'static _` to get the effect of `NonZero<*const _>`. /// `ptr` doesn’t really have the 'static lifetime! @@ -39,15 +39,15 @@ pub struct CompactCowStr<'a> { fn _static_assert_same_size<'a>() { // "Instantiate" the generic function without calling it. - let _ = mem::transmute::, Option>>; + let _ = mem::transmute::, Option>>; } -impl<'a> From<&'a str> for CompactCowStr<'a> { +impl<'a> From<&'a str> for CowRcStr<'a> { #[inline] fn from(s: &'a str) -> Self { let len = s.len(); assert!(len < usize::MAX); - CompactCowStr { + CowRcStr { ptr: unsafe { &*(s.as_ptr() as *const ()) }, borrowed_len_or_max: len, phantom: PhantomData, @@ -55,11 +55,11 @@ impl<'a> From<&'a str> for CompactCowStr<'a> { } } -impl<'a> From> for CompactCowStr<'a> { +impl<'a> From> for CowRcStr<'a> { #[inline] fn from(s: Rc) -> Self { let ptr = unsafe { &*(Rc::into_raw(s) as *const ()) }; - CompactCowStr { + CowRcStr { ptr: ptr, borrowed_len_or_max: usize::MAX, phantom: PhantomData, @@ -67,7 +67,7 @@ impl<'a> From> for CompactCowStr<'a> { } } -impl<'a> CompactCowStr<'a> { +impl<'a> CowRcStr<'a> { #[inline] fn unpack(&self) -> Result<&'a str, *const String> { if self.borrowed_len_or_max == usize::MAX { @@ -105,7 +105,7 @@ impl<'a> CompactCowStr<'a> { } } -impl<'a> Clone for CompactCowStr<'a> { +impl<'a> Clone for CowRcStr<'a> { #[inline] fn clone(&self) -> Self { match self.unpack() { @@ -118,13 +118,13 @@ impl<'a> Clone for CompactCowStr<'a> { new_rc.into() } Ok(_) => { - CompactCowStr { ..*self } + CowRcStr { ..*self } } } } } -impl<'a> Drop for CompactCowStr<'a> { +impl<'a> Drop for CowRcStr<'a> { #[inline] fn drop(&mut self) { if let Err(ptr) = self.unpack() { @@ -135,7 +135,7 @@ impl<'a> Drop for CompactCowStr<'a> { } } -impl<'a> Deref for CompactCowStr<'a> { +impl<'a> Deref for CowRcStr<'a> { type Target = str; #[inline] @@ -146,9 +146,9 @@ impl<'a> Deref for CompactCowStr<'a> { } } -impl<'a> From> for Cow<'a, str> { +impl<'a> From> for Cow<'a, str> { #[inline] - fn from(cow: CompactCowStr<'a>) -> Self { + fn from(cow: CowRcStr<'a>) -> Self { match cow.into_enum() { Ok(s) => Cow::Borrowed(s), Err(rc) => match Rc::try_unwrap(rc) { @@ -159,14 +159,14 @@ impl<'a> From> for Cow<'a, str> { } } -impl<'a> From for CompactCowStr<'a> { +impl<'a> From for CowRcStr<'a> { #[inline] fn from(s: String) -> Self { Self::from(Rc::new(s)) } } -impl<'a> From> for CompactCowStr<'a> { +impl<'a> From> for CowRcStr<'a> { #[inline] fn from(s: Cow<'a, str>) -> Self { match s { @@ -179,65 +179,65 @@ impl<'a> From> for CompactCowStr<'a> { // Boilerplate / trivial impls below. -impl<'a> AsRef for CompactCowStr<'a> { +impl<'a> AsRef for CowRcStr<'a> { #[inline] fn as_ref(&self) -> &str { self } } -impl<'a> Borrow for CompactCowStr<'a> { +impl<'a> Borrow for CowRcStr<'a> { #[inline] fn borrow(&self) -> &str { self } } -impl<'a> Default for CompactCowStr<'a> { +impl<'a> Default for CowRcStr<'a> { #[inline] fn default() -> Self { Self::from("") } } -impl<'a> hash::Hash for CompactCowStr<'a> { +impl<'a> hash::Hash for CowRcStr<'a> { #[inline] fn hash(&self, hasher: &mut H) { str::hash(self, hasher) } } -impl<'a, T: AsRef> PartialEq for CompactCowStr<'a> { +impl<'a, T: AsRef> PartialEq for CowRcStr<'a> { #[inline] fn eq(&self, other: &T) -> bool { str::eq(self, other.as_ref()) } } -impl<'a, T: AsRef> PartialOrd for CompactCowStr<'a> { +impl<'a, T: AsRef> PartialOrd for CowRcStr<'a> { #[inline] fn partial_cmp(&self, other: &T) -> Option { str::partial_cmp(self, other.as_ref()) } } -impl<'a> Eq for CompactCowStr<'a> {} +impl<'a> Eq for CowRcStr<'a> {} -impl<'a> Ord for CompactCowStr<'a> { +impl<'a> Ord for CowRcStr<'a> { #[inline] fn cmp(&self, other: &Self) -> cmp::Ordering { str::cmp(self, other) } } -impl<'a> fmt::Display for CompactCowStr<'a> { +impl<'a> fmt::Display for CowRcStr<'a> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) } } -impl<'a> fmt::Debug for CompactCowStr<'a> { +impl<'a> fmt::Debug for CowRcStr<'a> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) diff --git a/src/lib.rs b/src/lib.rs index f6a8a362..162d9443 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -91,7 +91,7 @@ pub use nth::parse_nth; pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType}; pub use parser::{Parser, Delimiter, Delimiters, SourcePosition, ParseError, BasicParseError, ParserInput}; pub use unicode_range::UnicodeRange; -pub use compact_cow_str::CompactCowStr; +pub use compact_cow_str::CowRcStr; // For macros #[doc(hidden)] pub use macros::_internal__to_lowercase; diff --git a/src/parser.rs b/src/parser.rs index 5a38b7cc..100ee7a1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use compact_cow_str::CompactCowStr; +use compact_cow_str::CowRcStr; use std::ops::Range; use std::ascii::AsciiExt; use std::ops::BitOr; @@ -28,7 +28,7 @@ pub enum BasicParseError<'a> { /// The end of the input was encountered unexpectedly. EndOfInput, /// An `@` rule was encountered that was invalid. - AtRuleInvalid(CompactCowStr<'a>), + AtRuleInvalid(CowRcStr<'a>), /// The body of an '@' rule was invalid. AtRuleBodyInvalid, /// A qualified rule was encountered that was invalid. @@ -446,7 +446,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return the unescaped value. #[inline] - pub fn expect_ident(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_ident(&mut self) -> Result, BasicParseError<'i>> { match self.next()? { Token::Ident(value) => Ok(value), t => Err(BasicParseError::UnexpectedToken(t)) @@ -464,7 +464,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return the unescaped value. #[inline] - pub fn expect_string(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_string(&mut self) -> Result, BasicParseError<'i>> { match self.next()? { Token::QuotedString(value) => Ok(value), t => Err(BasicParseError::UnexpectedToken(t)) @@ -473,7 +473,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse either a or a , and return the unescaped value. #[inline] - pub fn expect_ident_or_string(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_ident_or_string(&mut self) -> Result, BasicParseError<'i>> { match self.next()? { Token::Ident(value) => Ok(value), Token::QuotedString(value) => Ok(value), @@ -483,7 +483,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return the unescaped value. #[inline] - pub fn expect_url(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_url(&mut self) -> Result, BasicParseError<'i>> { match self.next()? { Token::UnquotedUrl(value) => Ok(value), Token::Function(ref name) if name.eq_ignore_ascii_case("url") => { @@ -496,7 +496,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse either a or a , and return the unescaped value. #[inline] - pub fn expect_url_or_string(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_url_or_string(&mut self) -> Result, BasicParseError<'i>> { match self.next()? { Token::UnquotedUrl(value) => Ok(value), Token::QuotedString(value) => Ok(value), @@ -612,7 +612,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. #[inline] - pub fn expect_function(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_function(&mut self) -> Result, BasicParseError<'i>> { match self.next()? { Token::Function(name) => Ok(name), t => Err(BasicParseError::UnexpectedToken(t)) diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index e7bdf29e..2c0e95e8 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -4,7 +4,7 @@ // https://drafts.csswg.org/css-syntax/#parsing -use compact_cow_str::CompactCowStr; +use compact_cow_str::CowRcStr; use parser::{parse_until_before, parse_until_after, parse_nested_block}; use std::ascii::AsciiExt; use std::ops::Range; @@ -72,7 +72,7 @@ pub trait DeclarationParser<'i> { /// If `!important` can be used in a given context, /// `input.try(parse_important).is_ok()` should be used at the end /// of the implementation of this method and the result should be part of the return value. - fn parse_value<'t>(&mut self, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>) + fn parse_value<'t>(&mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>) -> Result>; } @@ -112,7 +112,7 @@ pub trait AtRuleParser<'i> { /// The given `input` is a "delimited" parser /// that ends wherever the prelude should end. /// (Before the next semicolon, the next `{`, or the end of the current block.) - fn parse_prelude<'t>(&mut self, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>) + fn parse_prelude<'t>(&mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>) -> Result, ParseError<'i, Self::Error>> { let _ = name; let _ = input; @@ -407,7 +407,7 @@ pub struct PreciseParseError<'i, E: 'i> { pub span: Range, } -fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: CompactCowStr<'i>, +fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: CowRcStr<'i>, input: &mut Parser<'i, 't>, parser: &mut P) -> Result<

>::AtRule, PreciseParseError<'i, E>> where P: AtRuleParser<'i, Error = E> { diff --git a/src/size_of_tests.rs b/src/size_of_tests.rs index ba4fbf62..237d30f5 100644 --- a/src/size_of_tests.rs +++ b/src/size_of_tests.rs @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use compact_cow_str::CompactCowStr; +use compact_cow_str::CowRcStr; use std::borrow::Cow; use tokenizer::Token; @@ -34,4 +34,4 @@ macro_rules! size_of_test { // These assume 64-bit size_of_test!(token, Token, 32); size_of_test!(std_cow_str, Cow<'static, str>, 32); -size_of_test!(compact_cow_str, CompactCowStr, 16); +size_of_test!(compact_cow_str, CowRcStr, 16); diff --git a/src/tests.rs b/src/tests.rs index 454c1cb3..e3214238 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -16,7 +16,7 @@ use super::{Parser, Delimiter, Token, SourceLocation, ParseError, AtRuleType, AtRuleParser, QualifiedRuleParser, ParserInput, parse_one_declaration, parse_one_rule, parse_important, stylesheet_encoding, EncodingSupport, - TokenSerializationType, CompactCowStr, + TokenSerializationType, CowRcStr, Color, RGBA, parse_nth, UnicodeRange, ToCss}; macro_rules! JArray { @@ -294,7 +294,7 @@ fn unquoted_url_escaping() { #[test] fn test_expect_url() { - fn parse<'a>(s: &mut ParserInput<'a>) -> Result, BasicParseError<'a>> { + fn parse<'a>(s: &mut ParserInput<'a>) -> Result, BasicParseError<'a>> { Parser::new(s).expect_url() } let mut input = ParserInput::new("url()"); @@ -678,7 +678,7 @@ impl<'i> DeclarationParser<'i> for JsonParser { type Declaration = Json; type Error = (); - fn parse_value<'t>(&mut self, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>) + fn parse_value<'t>(&mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>) -> Result> { let mut value = vec![]; let mut important = false; @@ -719,7 +719,7 @@ impl<'i> AtRuleParser<'i> for JsonParser { type AtRule = Json; type Error = (); - fn parse_prelude<'t>(&mut self, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>) + fn parse_prelude<'t>(&mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>) -> Result, Json>, ParseError<'i, ()>> { Ok(AtRuleType::OptionalBlock(vec![ "at-rule".to_json(), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 2244e253..1703a113 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -10,7 +10,7 @@ use std::char; use std::ascii::AsciiExt; use std::i32; -use compact_cow_str::CompactCowStr; +use compact_cow_str::CowRcStr; use self::Token::*; @@ -22,32 +22,32 @@ use self::Token::*; pub enum Token<'a> { /// A [``](https://drafts.csswg.org/css-syntax/#ident-token-diagram) - Ident(CompactCowStr<'a>), + Ident(CowRcStr<'a>), /// A [``](https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram) /// /// The value does not include the `@` marker. - AtKeyword(CompactCowStr<'a>), + AtKeyword(CowRcStr<'a>), /// A [``](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "unrestricted" /// /// The value does not include the `#` marker. - Hash(CompactCowStr<'a>), + Hash(CowRcStr<'a>), /// A [``](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "id" /// /// The value does not include the `#` marker. - IDHash(CompactCowStr<'a>), // Hash that is a valid ID selector. + IDHash(CowRcStr<'a>), // Hash that is a valid ID selector. /// A [``](https://drafts.csswg.org/css-syntax/#string-token-diagram) /// /// The value does not include the quotes. - QuotedString(CompactCowStr<'a>), + QuotedString(CowRcStr<'a>), /// A [``](https://drafts.csswg.org/css-syntax/#url-token-diagram) or `url( )` function /// /// The value does not include the `url(` `)` markers or the quotes. - UnquotedUrl(CompactCowStr<'a>), + UnquotedUrl(CowRcStr<'a>), /// A `` Delim(char), @@ -93,7 +93,7 @@ pub enum Token<'a> { int_value: Option, /// The unit, e.g. "px" in `12px` - unit: CompactCowStr<'a> + unit: CowRcStr<'a> }, /// A [``](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram) @@ -143,7 +143,7 @@ pub enum Token<'a> { /// A [``](https://drafts.csswg.org/css-syntax/#function-token-diagram) /// /// The value (name) does not include the `(` marker. - Function(CompactCowStr<'a>), + Function(CowRcStr<'a>), /// A `<(-token>` ParenthesisBlock, @@ -157,12 +157,12 @@ pub enum Token<'a> { /// A `` /// /// This token always indicates a parse error. - BadUrl(CompactCowStr<'a>), + BadUrl(CowRcStr<'a>), /// A `` /// /// This token always indicates a parse error. - BadString(CompactCowStr<'a>), + BadString(CowRcStr<'a>), /// A `<)-token>` /// @@ -574,7 +574,7 @@ fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Toke /// Return `Err(())` on syntax error (ie. unescaped newline) fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) - -> Result, CompactCowStr<'a>> { + -> Result, CowRcStr<'a>> { tokenizer.advance(1); // Skip the initial quote // start_pos is at code point boundary, after " or ' let start_pos = tokenizer.position(); @@ -710,7 +710,7 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } } -fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CompactCowStr<'a> { +fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { // start_pos is the end of the previous token, therefore at a code point boundary let start_pos = tokenizer.position(); let mut value_bytes; @@ -1017,7 +1017,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ) } - fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: CompactCowStr<'a>) -> Token<'a> { + fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: CowRcStr<'a>) -> Token<'a> { while !tokenizer.is_eof() { match_byte! { tokenizer.consume_byte(), b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {}, From 32733fb570e732ace24d74b139006b2217808b6b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 4 Jul 2017 16:01:52 +0200 Subject: [PATCH 03/15] Remove CowRcStr APIs that might allocate and used to never do so. --- src/compact_cow_str.rs | 75 +++++++++++++----------------------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/src/compact_cow_str.rs b/src/compact_cow_str.rs index 1e1988f6..fb976795 100644 --- a/src/compact_cow_str.rs +++ b/src/compact_cow_str.rs @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use std::borrow::{Borrow, Cow}; +use std::borrow::Borrow; use std::cmp; use std::fmt; use std::hash; @@ -21,7 +21,7 @@ use std::usize; /// FIXME(https://github.com/rust-lang/rfcs/issues/1230): use an actual enum if/when /// the compiler can do this layout optimization. pub struct CowRcStr<'a> { - /// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared + /// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared. /// In the meantime we abuse `&'static _` to get the effect of `NonZero<*const _>`. /// `ptr` doesn’t really have the 'static lifetime! ptr: &'static (), @@ -55,9 +55,16 @@ impl<'a> From<&'a str> for CowRcStr<'a> { } } -impl<'a> From> for CowRcStr<'a> { +impl<'a> From for CowRcStr<'a> { + #[inline] + fn from(s: String) -> Self { + CowRcStr::from_rc(Rc::new(s)) + } +} + +impl<'a> CowRcStr<'a> { #[inline] - fn from(s: Rc) -> Self { + fn from_rc(s: Rc) -> Self { let ptr = unsafe { &*(Rc::into_raw(s) as *const ()) }; CowRcStr { ptr: ptr, @@ -65,9 +72,7 @@ impl<'a> From> for CowRcStr<'a> { phantom: PhantomData, } } -} -impl<'a> CowRcStr<'a> { #[inline] fn unpack(&self) -> Result<&'a str, *const String> { if self.borrowed_len_or_max == usize::MAX { @@ -82,24 +87,21 @@ impl<'a> CowRcStr<'a> { } } - #[inline] - fn into_enum(self) -> Result<&'a str, Rc> { - self.unpack().map_err(|ptr| { - mem::forget(self); - unsafe { - Rc::from_raw(ptr) - } - }) - } - /// Convert into `String`, re-using an existing memory allocation if possible. #[inline] pub fn into_owned(self) -> String { - match self.into_enum() { + let unpacked = self.unpack(); + + // Inhibit destructor: we’re taking ownership of this strong reference (if any) + mem::forget(self); + + match unpacked { Ok(s) => s.to_owned(), - Err(rc) => match Rc::try_unwrap(rc) { - Ok(s) => s, - Err(rc) => (*rc).clone() + Err(ptr) => { + let rc = unsafe { + Rc::from_raw(ptr) + }; + Rc::try_unwrap(rc).unwrap_or_else(|rc| (*rc).clone()) } } } @@ -115,7 +117,7 @@ impl<'a> Clone for CowRcStr<'a> { }; let new_rc = rc.clone(); mem::forget(rc); // Don’t actually take ownership of this strong reference - new_rc.into() + CowRcStr::from_rc(new_rc) } Ok(_) => { CowRcStr { ..*self } @@ -146,37 +148,6 @@ impl<'a> Deref for CowRcStr<'a> { } } -impl<'a> From> for Cow<'a, str> { - #[inline] - fn from(cow: CowRcStr<'a>) -> Self { - match cow.into_enum() { - Ok(s) => Cow::Borrowed(s), - Err(rc) => match Rc::try_unwrap(rc) { - Ok(s) => Cow::Owned(s), - Err(rc) => Cow::Owned((*rc).clone()) - } - } - } -} - -impl<'a> From for CowRcStr<'a> { - #[inline] - fn from(s: String) -> Self { - Self::from(Rc::new(s)) - } -} - -impl<'a> From> for CowRcStr<'a> { - #[inline] - fn from(s: Cow<'a, str>) -> Self { - match s { - Cow::Borrowed(s) => Self::from(s), - Cow::Owned(s) => Self::from(s), - } - } -} - - // Boilerplate / trivial impls below. impl<'a> AsRef for CowRcStr<'a> { From 8b7191b99ec463bcace35008802bff90f88d8305 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 18 Jul 2017 09:19:00 +0200 Subject: [PATCH 04/15] Add `From> for CowRcStr` --- src/compact_cow_str.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/compact_cow_str.rs b/src/compact_cow_str.rs index fb976795..3b562d7f 100644 --- a/src/compact_cow_str.rs +++ b/src/compact_cow_str.rs @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use std::borrow::Borrow; +use std::borrow::{Borrow, Cow}; use std::cmp; use std::fmt; use std::hash; @@ -42,6 +42,16 @@ fn _static_assert_same_size<'a>() { let _ = mem::transmute::, Option>>; } +impl<'a> From> for CowRcStr<'a> { + #[inline] + fn from(s: Cow<'a, str>) -> Self { + match s { + Cow::Borrowed(s) => CowRcStr::from(s), + Cow::Owned(s) => CowRcStr::from(s), + } + } +} + impl<'a> From<&'a str> for CowRcStr<'a> { #[inline] fn from(s: &'a str) -> Self { From c661e7c472bb2dba3b55c5e9d86f5a51842c96ff Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 18 Jul 2017 09:50:42 +0200 Subject: [PATCH 05/15] Rename compact_cow_str.rs to cow_rc_str.rs --- src/{compact_cow_str.rs => cow_rc_str.rs} | 0 src/lib.rs | 4 ++-- src/parser.rs | 2 +- src/rules_and_declarations.rs | 2 +- src/size_of_tests.rs | 4 ++-- src/tokenizer.rs | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) rename src/{compact_cow_str.rs => cow_rc_str.rs} (100%) diff --git a/src/compact_cow_str.rs b/src/cow_rc_str.rs similarity index 100% rename from src/compact_cow_str.rs rename to src/cow_rc_str.rs diff --git a/src/lib.rs b/src/lib.rs index 162d9443..94c81fb5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -91,7 +91,7 @@ pub use nth::parse_nth; pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType}; pub use parser::{Parser, Delimiter, Delimiters, SourcePosition, ParseError, BasicParseError, ParserInput}; pub use unicode_range::UnicodeRange; -pub use compact_cow_str::CowRcStr; +pub use cow_rc_str::CowRcStr; // For macros #[doc(hidden)] pub use macros::_internal__to_lowercase; @@ -117,7 +117,7 @@ mod color; mod nth; mod serializer; mod unicode_range; -mod compact_cow_str; +mod cow_rc_str; #[cfg(test)] mod tests; #[cfg(test)] mod size_of_tests; diff --git a/src/parser.rs b/src/parser.rs index 100ee7a1..3c08d212 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use compact_cow_str::CowRcStr; +use cow_rc_str::CowRcStr; use std::ops::Range; use std::ascii::AsciiExt; use std::ops::BitOr; diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 2c0e95e8..24af7441 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -4,7 +4,7 @@ // https://drafts.csswg.org/css-syntax/#parsing -use compact_cow_str::CowRcStr; +use cow_rc_str::CowRcStr; use parser::{parse_until_before, parse_until_after, parse_nested_block}; use std::ascii::AsciiExt; use std::ops::Range; diff --git a/src/size_of_tests.rs b/src/size_of_tests.rs index 237d30f5..13effc90 100644 --- a/src/size_of_tests.rs +++ b/src/size_of_tests.rs @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use compact_cow_str::CowRcStr; +use cow_rc_str::CowRcStr; use std::borrow::Cow; use tokenizer::Token; @@ -34,4 +34,4 @@ macro_rules! size_of_test { // These assume 64-bit size_of_test!(token, Token, 32); size_of_test!(std_cow_str, Cow<'static, str>, 32); -size_of_test!(compact_cow_str, CowRcStr, 16); +size_of_test!(cow_rc_str, CowRcStr, 16); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1703a113..205ef41c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -10,7 +10,7 @@ use std::char; use std::ascii::AsciiExt; use std::i32; -use compact_cow_str::CowRcStr; +use cow_rc_str::CowRcStr; use self::Token::*; From 4aeca596551ba5c336bca5bb2e2509b651691dde Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 17 Jul 2017 16:42:45 +0200 Subject: [PATCH 06/15] Remove unnecessary parenthesis --- src/parser.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 3c08d212..70f47dae 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -223,7 +223,7 @@ impl<'i: 't, 't> Parser<'i, 't> { #[inline] pub fn position(&self) -> SourcePosition { SourcePosition { - position: (self.tokenizer.0).position(), + position: self.tokenizer.0.position(), at_start_of: self.at_start_of, } } @@ -234,35 +234,35 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Should only be used with `SourcePosition` values from the same `Parser` instance. #[inline] pub fn reset(&mut self, new_position: SourcePosition) { - (self.tokenizer.0).reset(new_position.position); + self.tokenizer.0.reset(new_position.position); self.at_start_of = new_position.at_start_of; } /// Start looking for `var()` functions. (See the `.seen_var_functions()` method.) #[inline] pub fn look_for_var_functions(&mut self) { - (self.tokenizer.0).look_for_var_functions() + self.tokenizer.0.look_for_var_functions() } /// Return whether a `var()` function has been seen by the tokenizer since /// either `look_for_var_functions` was called, and stop looking. #[inline] pub fn seen_var_functions(&mut self) -> bool { - (self.tokenizer.0).seen_var_functions() + self.tokenizer.0.seen_var_functions() } /// Start looking for viewport percentage lengths. (See the `seen_viewport_percentages` /// method.) #[inline] pub fn look_for_viewport_percentages(&mut self) { - (self.tokenizer.0).look_for_viewport_percentages() + self.tokenizer.0.look_for_viewport_percentages() } /// Return whether a `vh`, `vw`, `vmin`, or `vmax` dimension has been seen by the tokenizer /// since `look_for_viewport_percentages` was called, and stop looking. #[inline] pub fn seen_viewport_percentages(&mut self) -> bool { - (self.tokenizer.0).seen_viewport_percentages() + self.tokenizer.0.seen_viewport_percentages() } /// Execute the given closure, passing it the parser. @@ -283,25 +283,25 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Return a slice of the CSS input #[inline] pub fn slice(&self, range: Range) -> &'i str { - (self.tokenizer.0).slice(range.start.position..range.end.position) + self.tokenizer.0.slice(range.start.position..range.end.position) } /// Return a slice of the CSS input, from the given position to the current one. #[inline] pub fn slice_from(&self, start_position: SourcePosition) -> &'i str { - (self.tokenizer.0).slice_from(start_position.position) + self.tokenizer.0.slice_from(start_position.position) } /// Return the line and column number within the input for the current position. #[inline] pub fn current_source_location(&self) -> SourceLocation { - (self.tokenizer.0).current_source_location() + self.tokenizer.0.current_source_location() } /// Return the line and column number within the input for the given position. #[inline] pub fn source_location(&self, target: SourcePosition) -> SourceLocation { - (self.tokenizer.0).source_location(target.position) + self.tokenizer.0.source_location(target.position) } /// Return the next token in the input that is neither whitespace or a comment, @@ -344,11 +344,11 @@ impl<'i: 't, 't> Parser<'i, 't> { if let Some(block_type) = self.at_start_of.take() { consume_until_end_of_block(block_type, &mut self.tokenizer.0); } - let byte = (self.tokenizer.0).next_byte(); + let byte = self.tokenizer.0.next_byte(); if self.stop_before.contains(Delimiters::from_byte(byte)) { return Err(BasicParseError::EndOfInput) } - let token = (self.tokenizer.0).next().map_err(|()| BasicParseError::EndOfInput)?; + let token = self.tokenizer.0.next().map_err(|()| BasicParseError::EndOfInput)?; if let Some(block_type) = BlockType::opening(&token) { self.at_start_of = Some(block_type); } From 49e8ab477bf9617a95d63e99e298ec2967cbf14f Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 17 Jul 2017 17:10:24 +0200 Subject: [PATCH 07/15] Rename private struct fields in ParserInput and Parser --- src/parser.rs | 62 +++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 70f47dae..81da8c61 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -61,12 +61,16 @@ impl<'a, T> ParseError<'a, T> { } /// The owned input for a parser. -pub struct ParserInput<'t>(Tokenizer<'t>); +pub struct ParserInput<'t> { + tokenizer: Tokenizer<'t>, +} impl<'t> ParserInput<'t> { /// Create a new input for a parser. pub fn new(input: &'t str) -> ParserInput<'t> { - ParserInput(Tokenizer::new(input)) + ParserInput { + tokenizer: Tokenizer::new(input), + } } } @@ -74,7 +78,7 @@ impl<'t> ParserInput<'t> { /// yields `Token`s, /// and keeps track of nested blocks and functions. pub struct Parser<'i: 't, 't> { - tokenizer: &'t mut ParserInput<'i>, + input: &'t mut ParserInput<'i>, /// If `Some(_)`, .parse_nested_block() can be called. at_start_of: Option, /// For parsers from `parse_until` or `parse_nested_block` @@ -182,7 +186,7 @@ impl<'i: 't, 't> Parser<'i, 't> { #[inline] pub fn new(input: &'t mut ParserInput<'i>) -> Parser<'i, 't> { Parser { - tokenizer: input, + input: input, at_start_of: None, stop_before: Delimiter::None, } @@ -190,7 +194,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Return the current line that is being parsed. pub fn current_line(&self) -> &'i str { - self.tokenizer.0.current_source_line() + self.input.tokenizer.current_source_line() } /// Check whether the input is exhausted. That is, if `.next()` would return a token. @@ -223,7 +227,7 @@ impl<'i: 't, 't> Parser<'i, 't> { #[inline] pub fn position(&self) -> SourcePosition { SourcePosition { - position: self.tokenizer.0.position(), + position: self.input.tokenizer.position(), at_start_of: self.at_start_of, } } @@ -234,35 +238,35 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Should only be used with `SourcePosition` values from the same `Parser` instance. #[inline] pub fn reset(&mut self, new_position: SourcePosition) { - self.tokenizer.0.reset(new_position.position); + self.input.tokenizer.reset(new_position.position); self.at_start_of = new_position.at_start_of; } /// Start looking for `var()` functions. (See the `.seen_var_functions()` method.) #[inline] pub fn look_for_var_functions(&mut self) { - self.tokenizer.0.look_for_var_functions() + self.input.tokenizer.look_for_var_functions() } /// Return whether a `var()` function has been seen by the tokenizer since /// either `look_for_var_functions` was called, and stop looking. #[inline] pub fn seen_var_functions(&mut self) -> bool { - self.tokenizer.0.seen_var_functions() + self.input.tokenizer.seen_var_functions() } /// Start looking for viewport percentage lengths. (See the `seen_viewport_percentages` /// method.) #[inline] pub fn look_for_viewport_percentages(&mut self) { - self.tokenizer.0.look_for_viewport_percentages() + self.input.tokenizer.look_for_viewport_percentages() } /// Return whether a `vh`, `vw`, `vmin`, or `vmax` dimension has been seen by the tokenizer /// since `look_for_viewport_percentages` was called, and stop looking. #[inline] pub fn seen_viewport_percentages(&mut self) -> bool { - self.tokenizer.0.seen_viewport_percentages() + self.input.tokenizer.seen_viewport_percentages() } /// Execute the given closure, passing it the parser. @@ -283,25 +287,25 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Return a slice of the CSS input #[inline] pub fn slice(&self, range: Range) -> &'i str { - self.tokenizer.0.slice(range.start.position..range.end.position) + self.input.tokenizer.slice(range.start.position..range.end.position) } /// Return a slice of the CSS input, from the given position to the current one. #[inline] pub fn slice_from(&self, start_position: SourcePosition) -> &'i str { - self.tokenizer.0.slice_from(start_position.position) + self.input.tokenizer.slice_from(start_position.position) } /// Return the line and column number within the input for the current position. #[inline] pub fn current_source_location(&self) -> SourceLocation { - self.tokenizer.0.current_source_location() + self.input.tokenizer.current_source_location() } /// Return the line and column number within the input for the given position. #[inline] pub fn source_location(&self, target: SourcePosition) -> SourceLocation { - self.tokenizer.0.source_location(target.position) + self.input.tokenizer.source_location(target.position) } /// Return the next token in the input that is neither whitespace or a comment, @@ -342,13 +346,13 @@ impl<'i: 't, 't> Parser<'i, 't> { /// comments should always be ignored between tokens. pub fn next_including_whitespace_and_comments(&mut self) -> Result, BasicParseError<'i>> { if let Some(block_type) = self.at_start_of.take() { - consume_until_end_of_block(block_type, &mut self.tokenizer.0); + consume_until_end_of_block(block_type, &mut self.input.tokenizer); } - let byte = self.tokenizer.0.next_byte(); + let byte = self.input.tokenizer.next_byte(); if self.stop_before.contains(Delimiters::from_byte(byte)) { return Err(BasicParseError::EndOfInput) } - let token = self.tokenizer.0.next().map_err(|()| BasicParseError::EndOfInput)?; + let token = self.input.tokenizer.next().map_err(|()| BasicParseError::EndOfInput)?; if let Some(block_type) = BlockType::opening(&token) { self.at_start_of = Some(block_type); } @@ -665,23 +669,23 @@ pub fn parse_until_before<'i: 't, 't, F, T, E>(parser: &mut Parser<'i, 't>, // Introduce a new scope to limit duration of nested_parser’s borrow { let mut delimited_parser = Parser { - tokenizer: parser.tokenizer, + input: parser.input, at_start_of: parser.at_start_of.take(), stop_before: delimiters, }; result = delimited_parser.parse_entirely(parse); if let Some(block_type) = delimited_parser.at_start_of { - consume_until_end_of_block(block_type, &mut delimited_parser.tokenizer.0); + consume_until_end_of_block(block_type, &mut delimited_parser.input.tokenizer); } } // FIXME: have a special-purpose tokenizer method for this that does less work. loop { - if delimiters.contains(Delimiters::from_byte((parser.tokenizer.0).next_byte())) { + if delimiters.contains(Delimiters::from_byte((parser.input.tokenizer).next_byte())) { break } - if let Ok(token) = (parser.tokenizer.0).next() { + if let Ok(token) = (parser.input.tokenizer).next() { if let Some(block_type) = BlockType::opening(&token) { - consume_until_end_of_block(block_type, &mut parser.tokenizer.0); + consume_until_end_of_block(block_type, &mut parser.input.tokenizer); } } else { break @@ -696,12 +700,12 @@ pub fn parse_until_after<'i: 't, 't, F, T, E>(parser: &mut Parser<'i, 't>, -> Result > where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result> { let result = parser.parse_until_before(delimiters, parse); - let next_byte = (parser.tokenizer.0).next_byte(); + let next_byte = (parser.input.tokenizer).next_byte(); if next_byte.is_some() && !parser.stop_before.contains(Delimiters::from_byte(next_byte)) { debug_assert!(delimiters.contains(Delimiters::from_byte(next_byte))); - (parser.tokenizer.0).advance(1); + (parser.input.tokenizer).advance(1); if next_byte == Some(b'{') { - consume_until_end_of_block(BlockType::CurlyBracket, &mut parser.tokenizer.0); + consume_until_end_of_block(BlockType::CurlyBracket, &mut parser.input.tokenizer); } } result @@ -724,16 +728,16 @@ pub fn parse_nested_block<'i: 't, 't, F, T, E>(parser: &mut Parser<'i, 't>, pars // Introduce a new scope to limit duration of nested_parser’s borrow { let mut nested_parser = Parser { - tokenizer: parser.tokenizer, + input: parser.input, at_start_of: None, stop_before: closing_delimiter, }; result = nested_parser.parse_entirely(parse); if let Some(block_type) = nested_parser.at_start_of { - consume_until_end_of_block(block_type, &mut nested_parser.tokenizer.0); + consume_until_end_of_block(block_type, &mut nested_parser.input.tokenizer); } } - consume_until_end_of_block(block_type, &mut parser.tokenizer.0); + consume_until_end_of_block(block_type, &mut parser.input.tokenizer); result } From b6072ca53148cf4eba1cdb02f4cc8378cafda104 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 17 Jul 2017 17:39:39 +0200 Subject: [PATCH 08/15] Add a one-token cache. This will avoid duplicating tokenization work, hopefully in most uses of `Parser::try`. --- src/parser.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 81da8c61..25620280 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -61,15 +61,23 @@ impl<'a, T> ParseError<'a, T> { } /// The owned input for a parser. -pub struct ParserInput<'t> { - tokenizer: Tokenizer<'t>, +pub struct ParserInput<'i> { + tokenizer: Tokenizer<'i>, + cached_token: Option>, } -impl<'t> ParserInput<'t> { +struct CachedToken<'i> { + token: Token<'i>, + start_position: tokenizer::SourcePosition, + end_position: tokenizer::SourcePosition, +} + +impl<'i> ParserInput<'i> { /// Create a new input for a parser. - pub fn new(input: &'t str) -> ParserInput<'t> { + pub fn new(input: &'i str) -> ParserInput<'i> { ParserInput { tokenizer: Tokenizer::new(input), + cached_token: None, } } } @@ -348,11 +356,49 @@ impl<'i: 't, 't> Parser<'i, 't> { if let Some(block_type) = self.at_start_of.take() { consume_until_end_of_block(block_type, &mut self.input.tokenizer); } + let byte = self.input.tokenizer.next_byte(); if self.stop_before.contains(Delimiters::from_byte(byte)) { return Err(BasicParseError::EndOfInput) } - let token = self.input.tokenizer.next().map_err(|()| BasicParseError::EndOfInput)?; + + let token_start_position = self.input.tokenizer.position(); + let token; + match self.input.cached_token { + Some(ref cached_token) if cached_token.start_position == token_start_position => { + self.input.tokenizer.reset(cached_token.end_position); + token = cached_token.token.clone(); + } + _ => { + token = self.input.tokenizer.next().map_err(|()| BasicParseError::EndOfInput)?; + match token { + // Don’t cache whitespace or comment tokens. + // A typical pattern is: + // + // ``` + // parser.try(|parser| { + // match parser.next() { … } + // }).or_else(|| { + // match parser.next() { … } + // }) + // ``` + // + // If the curren position at the start of this code is at a whitespace token, + // the "interesting" token (returned by `next`) comes later. + // So in the second call to `next`, we don’t want "uninteresting" tokens + // to overwrite the cache. + Token::WhiteSpace(_) | Token::Comment(_) => {} + _ => { + self.input.cached_token = Some(CachedToken { + token: token.clone(), + start_position: token_start_position, + end_position: self.input.tokenizer.position(), + }) + } + } + } + } + if let Some(block_type) = BlockType::opening(&token) { self.at_start_of = Some(block_type); } From 47053936cef8dc5d0b616ff664d65783e39fce14 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 18 Jul 2017 15:48:52 +0200 Subject: [PATCH 09/15] Token cache heuristic: monotonic position rather than token type. --- src/parser.rs | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 25620280..38c944af 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -371,30 +371,29 @@ impl<'i: 't, 't> Parser<'i, 't> { } _ => { token = self.input.tokenizer.next().map_err(|()| BasicParseError::EndOfInput)?; - match token { - // Don’t cache whitespace or comment tokens. - // A typical pattern is: - // - // ``` - // parser.try(|parser| { - // match parser.next() { … } - // }).or_else(|| { - // match parser.next() { … } - // }) - // ``` - // - // If the curren position at the start of this code is at a whitespace token, - // the "interesting" token (returned by `next`) comes later. - // So in the second call to `next`, we don’t want "uninteresting" tokens - // to overwrite the cache. - Token::WhiteSpace(_) | Token::Comment(_) => {} - _ => { - self.input.cached_token = Some(CachedToken { - token: token.clone(), - start_position: token_start_position, - end_position: self.input.tokenizer.position(), - }) - } + + // Only overwrite an existing cached token if the new one is further in the stream. + // A typical pattern is: + // + // ``` + // parser.try(|parser| { + // match parser.next() { … } + // }).or_else(|| { + // match parser.next() { … } + // }) + // ``` + // + // If the current position at the start of this code is at a whitespace token, + // `next` is going to skip it and go to the next token. + // If `try` then rewinds, the second token is the one that will be in the cache. + // To be able to get any cache hit in this case, + // we don’t want to overwrite the cache with the intermediate whitespace token. + if self.input.cached_token.as_ref().map_or(true, |c| c.start_position < token_start_position) { + self.input.cached_token = Some(CachedToken { + token: token.clone(), + start_position: token_start_position, + end_position: self.input.tokenizer.position(), + }) } } } From a79e53672a5fc146135091217e68e1155c08dbea Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 18 Jul 2017 19:21:05 +0200 Subject: [PATCH 10/15] Make Parser::next return &Token instead of Token --- src/color.rs | 42 ++++---- src/nth.rs | 63 +++++++----- src/parser.rs | 188 ++++++++++++++++------------------ src/rules_and_declarations.rs | 104 ++++++++++--------- src/tests.rs | 48 ++++----- src/unicode_range.rs | 14 +-- 6 files changed, 233 insertions(+), 226 deletions(-) diff --git a/src/color.rs b/src/color.rs index e56d6f7f..6dbc2142 100644 --- a/src/color.rs +++ b/src/color.rs @@ -141,7 +141,7 @@ impl Color { /// /// FIXME(#2) Deprecated CSS2 System Colors are not supported yet. pub fn parse<'i, 't>(input: &mut Parser<'i, 't>) -> Result> { - let token = input.next()?; + let token = input.next()?.clone(); match token { Token::Hash(ref value) | Token::IDHash(ref value) => { Color::parse_hash(value.as_bytes()) @@ -154,7 +154,7 @@ impl Color { }).map_err(ParseError::<()>::basic); } _ => Err(()) - }.map_err(|()| BasicParseError::UnexpectedToken(token)) + }.map_err(|()| BasicParseError::UnexpectedToken(token.clone())) } /// Parse a color hash, without the leading '#' character. @@ -422,21 +422,17 @@ fn parse_color_function<'i, 't>(name: &str, arguments: &mut Parser<'i, 't>) -> R if uses_commas { arguments.expect_comma()?; } else { - match arguments.next()? { - Token::Delim('/') => {}, - t => return Err(BasicParseError::UnexpectedToken(t)), - }; + arguments.expect_delim('/')?; }; - let token = arguments.next()?; - match token { + match *arguments.next()? { Token::Number { value: v, .. } => { clamp_unit_f32(v) } Token::Percentage { unit_value: v, .. } => { clamp_unit_f32(v) } - t => { - return Err(BasicParseError::UnexpectedToken(t)) + ref t => { + return Err(BasicParseError::UnexpectedToken(t.clone())) } } } else { @@ -457,10 +453,10 @@ fn parse_rgb_components_rgb<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u // Either integers or percentages, but all the same type. // https://drafts.csswg.org/css-color/#rgb-functions - match arguments.next()? { + match arguments.next()?.clone() { Token::Number { value: v, .. } => { red = clamp_floor_256_f32(v); - green = clamp_floor_256_f32(match arguments.next()? { + green = clamp_floor_256_f32(match arguments.next()?.clone() { Token::Number { value: v, .. } => v, Token::Comma => { uses_commas = true; @@ -475,7 +471,7 @@ fn parse_rgb_components_rgb<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u } Token::Percentage { unit_value, .. } => { red = clamp_unit_f32(unit_value); - green = clamp_unit_f32(match arguments.next()? { + green = clamp_unit_f32(match arguments.next()?.clone() { Token::Percentage { unit_value, .. } => unit_value, Token::Comma => { uses_commas = true; @@ -498,28 +494,26 @@ fn parse_rgb_components_hsl<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u let mut uses_commas = false; // Hue given as an angle // https://drafts.csswg.org/css-values/#angles - let token = arguments.next()?; - let hue_degrees = match token { - Token::Number { value: v, .. } => Ok(v), + let hue_degrees = match *arguments.next()? { + Token::Number { value: v, .. } => v, Token::Dimension { value: v, ref unit, .. } => { match_ignore_ascii_case! { &*unit, - "deg" => Ok(v), - "grad" => Ok(v * 360. / 400.), - "rad" => Ok(v * 360. / (2. * PI)), - "turn" => Ok(v * 360.), - _ => Err(()), + "deg" => v, + "grad" => v * 360. / 400., + "rad" => v * 360. / (2. * PI), + "turn" => v * 360., + _ => return Err(BasicParseError::UnexpectedToken(Token::Ident(unit.clone()))), } } - t => return Err(BasicParseError::UnexpectedToken(t)) + ref t => return Err(BasicParseError::UnexpectedToken(t.clone())) }; - let hue_degrees = hue_degrees.map_err(|()| BasicParseError::UnexpectedToken(token))?; // Subtract an integer before rounding, to avoid some rounding errors: let hue_normalized_degrees = hue_degrees - 360. * (hue_degrees / 360.).floor(); let hue = hue_normalized_degrees / 360.; // Saturation and lightness are clamped to 0% ... 100% // https://drafts.csswg.org/css-color/#the-hsl-notation - let saturation = match arguments.next()? { + let saturation = match arguments.next()?.clone() { Token::Percentage { unit_value, .. } => unit_value, Token::Comma => { uses_commas = true; diff --git a/src/nth.rs b/src/nth.rs index 94e7efbd..7c111f53 100644 --- a/src/nth.rs +++ b/src/nth.rs @@ -12,22 +12,23 @@ use super::{Token, Parser, BasicParseError}; /// in which case the caller needs to check if the arguments’ parser is exhausted. /// Return `Ok((A, B))`, or `Err(())` for a syntax error. pub fn parse_nth<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(i32, i32), BasicParseError<'i>> { - let token = input.next()?; - match token { + // FIXME: remove .clone() when lifetimes are non-lexical. + match input.next()?.clone() { Token::Number { int_value: Some(b), .. } => { Ok((0, b)) } - Token::Dimension { int_value: Some(a), ref unit, .. } => { + Token::Dimension { int_value: Some(a), unit, .. } => { match_ignore_ascii_case! { &unit, "n" => Ok(try!(parse_b(input, a))), "n-" => Ok(try!(parse_signless_b(input, a, -1))), - _ => { - parse_n_dash_digits(&*unit).map(|val| (a, val)) + _ => match parse_n_dash_digits(&*unit) { + Ok(b) => Ok((a, b)), + Err(()) => Err(BasicParseError::UnexpectedToken(Token::Ident(unit.clone()))) } } } - Token::Ident(ref value) => { + Token::Ident(value) => { match_ignore_ascii_case! { &value, "even" => Ok((2, 0)), "odd" => Ok((2, 1)), @@ -35,48 +36,56 @@ pub fn parse_nth<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(i32, i32), Basic "-n" => Ok(try!(parse_b(input, -1))), "n-" => Ok(try!(parse_signless_b(input, 1, -1))), "-n-" => Ok(try!(parse_signless_b(input, -1, -1))), - _ => if value.starts_with("-") { - parse_n_dash_digits(&value[1..]).map(|v| (-1, v)) - } else { - parse_n_dash_digits(&*value).map(|v| (1, v)) + _ => { + let (slice, a) = if value.starts_with("-") { + (&value[1..], -1) + } else { + (&*value, 1) + }; + match parse_n_dash_digits(slice) { + Ok(b) => Ok((a, b)), + Err(()) => Err(BasicParseError::UnexpectedToken(Token::Ident(value.clone()))) + } } } } - Token::Delim('+') => match input.next_including_whitespace()? { + // FIXME: remove .clone() when lifetimes are non-lexical. + Token::Delim('+') => match input.next_including_whitespace()?.clone() { Token::Ident(value) => { match_ignore_ascii_case! { &value, - "n" => Ok(try!(parse_b(input, 1))), - "n-" => Ok(try!(parse_signless_b(input, 1, -1))), - _ => parse_n_dash_digits(&*value).map(|v| (1, v)) + "n" => parse_b(input, 1), + "n-" => parse_signless_b(input, 1, -1), + _ => match parse_n_dash_digits(&*value) { + Ok(b) => Ok((1, b)), + Err(()) => Err(BasicParseError::UnexpectedToken(Token::Ident(value.clone()))) + } } } - t => return Err(BasicParseError::UnexpectedToken(t)), + token => Err(BasicParseError::UnexpectedToken(token)), }, - _ => Err(()), - }.map_err(|()| BasicParseError::UnexpectedToken(token)) + token => Err(BasicParseError::UnexpectedToken(token)), + } } fn parse_b<'i, 't>(input: &mut Parser<'i, 't>, a: i32) -> Result<(i32, i32), BasicParseError<'i>> { let start_position = input.position(); - let token = input.next(); - match token { - Ok(Token::Delim('+')) => Ok(parse_signless_b(input, a, 1)?), - Ok(Token::Delim('-')) => Ok(parse_signless_b(input, a, -1)?), - Ok(Token::Number { has_sign: true, int_value: Some(b), .. }) => Ok((a, b)), + match input.next() { + Ok(&Token::Delim('+')) => parse_signless_b(input, a, 1), + Ok(&Token::Delim('-')) => parse_signless_b(input, a, -1), + Ok(&Token::Number { has_sign: true, int_value: Some(b), .. }) => Ok((a, b)), _ => { input.reset(start_position); Ok((a, 0)) } - }.map_err(|()| BasicParseError::UnexpectedToken(token.unwrap())) + } } fn parse_signless_b<'i, 't>(input: &mut Parser<'i, 't>, a: i32, b_sign: i32) -> Result<(i32, i32), BasicParseError<'i>> { - let token = input.next()?; - match token { + match *input.next()? { Token::Number { has_sign: false, int_value: Some(b), .. } => Ok((a, b_sign * b)), - _ => Err(()) - }.map_err(|()| BasicParseError::UnexpectedToken(token)) + ref token => Err(BasicParseError::UnexpectedToken(token.clone())) + } } fn parse_n_dash_digits(string: &str) -> Result { diff --git a/src/parser.rs b/src/parser.rs index 38c944af..8aedb1be 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -80,6 +80,11 @@ impl<'i> ParserInput<'i> { cached_token: None, } } + + #[inline] + fn cached_token_ref(&self) -> &Token<'i> { + &self.cached_token.as_ref().unwrap().token + } } /// A CSS parser that borrows its `&str` input, @@ -223,7 +228,7 @@ impl<'i: 't, 't> Parser<'i, 't> { let result = match self.next() { Err(BasicParseError::EndOfInput) => Ok(()), Err(e) => unreachable!("Unexpected error encountered: {:?}", e), - Ok(t) => Err(BasicParseError::UnexpectedToken(t)), + Ok(t) => Err(BasicParseError::UnexpectedToken(t.clone())), }; self.reset(start_position); result @@ -327,23 +332,27 @@ impl<'i: 't, 't> Parser<'i, 't> { /// See the `Parser::parse_nested_block` method to parse the content of functions or blocks. /// /// This only returns a closing token when it is unmatched (and therefore an error). - pub fn next(&mut self) -> Result, BasicParseError<'i>> { + pub fn next(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> { loop { match self.next_including_whitespace_and_comments() { - Ok(Token::WhiteSpace(_)) | Ok(Token::Comment(_)) => {}, - result => return result + Err(e) => return Err(e), + Ok(&Token::WhiteSpace(_)) | Ok(&Token::Comment(_)) => {}, + _ => break } } + Ok(self.input.cached_token_ref()) } /// Same as `Parser::next`, but does not skip whitespace tokens. - pub fn next_including_whitespace(&mut self) -> Result, BasicParseError<'i>> { + pub fn next_including_whitespace(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> { loop { match self.next_including_whitespace_and_comments() { - Ok(Token::Comment(_)) => {}, - result => return result + Err(e) => return Err(e), + Ok(&Token::Comment(_)) => {}, + _ => break } } + Ok(self.input.cached_token_ref()) } /// Same as `Parser::next`, but does not skip whitespace or comment tokens. @@ -352,7 +361,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// where comments are preserved. /// When parsing higher-level values, per the CSS Syntax specification, /// comments should always be ignored between tokens. - pub fn next_including_whitespace_and_comments(&mut self) -> Result, BasicParseError<'i>> { + pub fn next_including_whitespace_and_comments(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> { if let Some(block_type) = self.at_start_of.take() { consume_until_end_of_block(block_type, &mut self.input.tokenizer); } @@ -367,38 +376,20 @@ impl<'i: 't, 't> Parser<'i, 't> { match self.input.cached_token { Some(ref cached_token) if cached_token.start_position == token_start_position => { self.input.tokenizer.reset(cached_token.end_position); - token = cached_token.token.clone(); + token = &cached_token.token } _ => { - token = self.input.tokenizer.next().map_err(|()| BasicParseError::EndOfInput)?; - - // Only overwrite an existing cached token if the new one is further in the stream. - // A typical pattern is: - // - // ``` - // parser.try(|parser| { - // match parser.next() { … } - // }).or_else(|| { - // match parser.next() { … } - // }) - // ``` - // - // If the current position at the start of this code is at a whitespace token, - // `next` is going to skip it and go to the next token. - // If `try` then rewinds, the second token is the one that will be in the cache. - // To be able to get any cache hit in this case, - // we don’t want to overwrite the cache with the intermediate whitespace token. - if self.input.cached_token.as_ref().map_or(true, |c| c.start_position < token_start_position) { - self.input.cached_token = Some(CachedToken { - token: token.clone(), - start_position: token_start_position, - end_position: self.input.tokenizer.position(), - }) - } + let new_token = self.input.tokenizer.next().map_err(|()| BasicParseError::EndOfInput)?; + self.input.cached_token = Some(CachedToken { + token: new_token, + start_position: token_start_position, + end_position: self.input.tokenizer.position(), + }); + token = self.input.cached_token_ref() } } - if let Some(block_type) = BlockType::opening(&token) { + if let Some(block_type) = BlockType::opening(token) { self.at_start_of = Some(block_type); } Ok(token) @@ -434,7 +425,7 @@ impl<'i: 't, 't> Parser<'i, 't> { values.push(self.parse_until_before(Delimiter::Comma, &mut parse_one)?); match self.next() { Err(_) => return Ok(values), - Ok(Token::Comma) => continue, + Ok(&Token::Comma) => continue, Ok(_) => unreachable!(), } } @@ -487,94 +478,93 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return its value. #[inline] pub fn expect_whitespace(&mut self) -> Result<&'i str, BasicParseError<'i>> { - match self.next_including_whitespace()? { + match *self.next_including_whitespace()? { Token::WhiteSpace(value) => Ok(value), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a and return the unescaped value. #[inline] pub fn expect_ident(&mut self) -> Result, BasicParseError<'i>> { - match self.next()? { - Token::Ident(value) => Ok(value), - t => Err(BasicParseError::UnexpectedToken(t)) + match *self.next()? { + Token::Ident(ref value) => Ok(value.clone()), + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a whose unescaped value is an ASCII-insensitive match for the given value. #[inline] pub fn expect_ident_matching(&mut self, expected_value: &str) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a and return the unescaped value. #[inline] pub fn expect_string(&mut self) -> Result, BasicParseError<'i>> { - match self.next()? { - Token::QuotedString(value) => Ok(value), - t => Err(BasicParseError::UnexpectedToken(t)) + match *self.next()? { + Token::QuotedString(ref value) => Ok(value.clone()), + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse either a or a , and return the unescaped value. #[inline] pub fn expect_ident_or_string(&mut self) -> Result, BasicParseError<'i>> { - match self.next()? { - Token::Ident(value) => Ok(value), - Token::QuotedString(value) => Ok(value), - t => Err(BasicParseError::UnexpectedToken(t)) + match *self.next()? { + Token::Ident(ref value) => Ok(value.clone()), + Token::QuotedString(ref value) => Ok(value.clone()), + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a and return the unescaped value. #[inline] pub fn expect_url(&mut self) -> Result, BasicParseError<'i>> { - match self.next()? { - Token::UnquotedUrl(value) => Ok(value), - Token::Function(ref name) if name.eq_ignore_ascii_case("url") => { - self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic)) - .map_err(ParseError::<()>::basic) - }, - t => Err(BasicParseError::UnexpectedToken(t)) + // FIXME: revert early returns when lifetimes are non-lexical + match *self.next()? { + Token::UnquotedUrl(ref value) => return Ok(value.clone()), + Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {} + ref t => return Err(BasicParseError::UnexpectedToken(t.clone())) } + self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic)) + .map_err(ParseError::<()>::basic) } /// Parse either a or a , and return the unescaped value. #[inline] pub fn expect_url_or_string(&mut self) -> Result, BasicParseError<'i>> { - match self.next()? { - Token::UnquotedUrl(value) => Ok(value), - Token::QuotedString(value) => Ok(value), - Token::Function(ref name) if name.eq_ignore_ascii_case("url") => { - self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic)) - .map_err(ParseError::<()>::basic) - }, - t => Err(BasicParseError::UnexpectedToken(t)) + // FIXME: revert early returns when lifetimes are non-lexical + match *self.next()? { + Token::UnquotedUrl(ref value) => return Ok(value.clone()), + Token::QuotedString(ref value) => return Ok(value.clone()), + Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {} + ref t => return Err(BasicParseError::UnexpectedToken(t.clone())) } + self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic)) + .map_err(ParseError::<()>::basic) } /// Parse a and return the integer value. #[inline] pub fn expect_number(&mut self) -> Result> { - match self.next()? { + match *self.next()? { Token::Number { value, .. } => Ok(value), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a that does not have a fractional part, and return the integer value. #[inline] pub fn expect_integer(&mut self) -> Result> { - let token = self.next()?; - match token { + match *self.next()? { Token::Number { int_value: Some(int_value), .. } => { Ok(int_value) } - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -582,45 +572,45 @@ impl<'i: 't, 't> Parser<'i, 't> { /// `0%` and `100%` map to `0.0` and `1.0` (not `100.0`), respectively. #[inline] pub fn expect_percentage(&mut self) -> Result> { - match self.next()? { + match *self.next()? { Token::Percentage { unit_value, .. } => Ok(unit_value), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a `:` . #[inline] pub fn expect_colon(&mut self) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::Colon => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a `;` . #[inline] pub fn expect_semicolon(&mut self) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::Semicolon => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a `,` . #[inline] pub fn expect_comma(&mut self) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::Comma => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse a with the given value. #[inline] pub fn expect_delim(&mut self, expected_value: char) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::Delim(value) if value == expected_value => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -629,9 +619,9 @@ impl<'i: 't, 't> Parser<'i, 't> { /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. #[inline] pub fn expect_curly_bracket_block(&mut self) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::CurlyBracketBlock => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -640,9 +630,9 @@ impl<'i: 't, 't> Parser<'i, 't> { /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. #[inline] pub fn expect_square_bracket_block(&mut self) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::SquareBracketBlock => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -651,9 +641,9 @@ impl<'i: 't, 't> Parser<'i, 't> { /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. #[inline] pub fn expect_parenthesis_block(&mut self) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::ParenthesisBlock => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -662,9 +652,9 @@ impl<'i: 't, 't> Parser<'i, 't> { /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. #[inline] pub fn expect_function(&mut self) -> Result, BasicParseError<'i>> { - match self.next()? { - Token::Function(name) => Ok(name), - t => Err(BasicParseError::UnexpectedToken(t)) + match *self.next()? { + Token::Function(ref name) => Ok(name.clone()), + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -673,9 +663,9 @@ impl<'i: 't, 't> Parser<'i, 't> { /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. #[inline] pub fn expect_function_matching(&mut self, expected_name: &str) -> Result<(), BasicParseError<'i>> { - match self.next()? { + match *self.next()? { Token::Function(ref name) if name.eq_ignore_ascii_case(expected_name) => Ok(()), - t => Err(BasicParseError::UnexpectedToken(t)) + ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -686,20 +676,22 @@ impl<'i: 't, 't> Parser<'i, 't> { pub fn expect_no_error_token(&mut self) -> Result<(), BasicParseError<'i>> { loop { match self.next_including_whitespace_and_comments() { - Ok(Token::Function(_)) | Ok(Token::ParenthesisBlock) | - Ok(Token::SquareBracketBlock) | Ok(Token::CurlyBracketBlock) => { - let result = self.parse_nested_block(|input| input.expect_no_error_token() - .map_err(|e| ParseError::Basic(e))); - result.map_err(ParseError::<()>::basic)? - } + Ok(&Token::Function(_)) | + Ok(&Token::ParenthesisBlock) | + Ok(&Token::SquareBracketBlock) | + Ok(&Token::CurlyBracketBlock) => {} Ok(token) => { if token.is_parse_error() { //FIXME: maybe these should be separate variants of BasicParseError instead? - return Err(BasicParseError::UnexpectedToken(token)) + return Err(BasicParseError::UnexpectedToken(token.clone())) } + continue } Err(_) => return Ok(()) } + let result = self.parse_nested_block(|input| input.expect_no_error_token() + .map_err(|e| ParseError::Basic(e))); + result.map_err(ParseError::<()>::basic)? } } } diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 24af7441..f40b1fcb 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -239,9 +239,17 @@ where P: DeclarationParser<'i, Declaration = I, Error = E> + fn next(&mut self) -> Option>> { loop { let start_position = self.input.position(); - match self.input.next_including_whitespace_and_comments() { - Ok(Token::WhiteSpace(_)) | Ok(Token::Comment(_)) | Ok(Token::Semicolon) => {} - Ok(Token::Ident(name)) => { + // FIXME: remove intermediate variable when lifetimes are non-lexical + let ident = match self.input.next_including_whitespace_and_comments() { + Ok(&Token::WhiteSpace(_)) | Ok(&Token::Comment(_)) | Ok(&Token::Semicolon) => continue, + Ok(&Token::Ident(ref name)) => Ok(Ok(name.clone())), + Ok(&Token::AtKeyword(ref name)) => Ok(Err(name.clone())), + Ok(token) => Err(token.clone()), + Err(_) => return None, + }; + match ident { + Ok(Ok(name)) => { + // Ident return Some({ let parser = &mut self.parser; // FIXME: https://github.com/rust-lang/rust/issues/42508 @@ -254,18 +262,18 @@ where P: DeclarationParser<'i, Declaration = I, Error = E> + span: start_position..self.input.position() })) } - Ok(Token::AtKeyword(name)) => { + Ok(Err(name)) => { + // At-keyword return Some(parse_at_rule(start_position, name, self.input, &mut self.parser)) } - Ok(t) => { + Err(token) => { return Some(self.input.parse_until_after(Delimiter::Semicolon, - |_| Err(ParseError::Basic(BasicParseError::UnexpectedToken(t)))) + |_| Err(ParseError::Basic(BasicParseError::UnexpectedToken(token.clone())))) .map_err(|e| PreciseParseError { error: e, span: start_position..self.input.position() })) } - Err(_) => return None, } } } @@ -334,29 +342,31 @@ where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + fn next(&mut self) -> Option>> { loop { let start_position = self.input.position(); - match self.input.next_including_whitespace_and_comments() { - Ok(Token::WhiteSpace(_)) | Ok(Token::Comment(_)) => {} - Ok(Token::CDO) | Ok(Token::CDC) if self.is_stylesheet => {} - Ok(Token::AtKeyword(name)) => { - let first_stylesheet_rule = self.is_stylesheet && !self.any_rule_so_far; - self.any_rule_so_far = true; - if first_stylesheet_rule && name.eq_ignore_ascii_case("charset") { - let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock; - let _: Result<(), ParseError<()>> = self.input.parse_until_after(delimiters, |_| Ok(())); - } else { - return Some(parse_at_rule(start_position, name, self.input, &mut self.parser)) - } - } - Ok(_) => { - self.any_rule_so_far = true; - self.input.reset(start_position); - return Some(parse_qualified_rule(self.input, &mut self.parser) - .map_err(|e| PreciseParseError { - error: e, - span: start_position..self.input.position() - })) - } + // FIXME: remove intermediate variable when lifetimes are non-lexical + let at_keyword = match self.input.next_including_whitespace_and_comments() { + Ok(&Token::WhiteSpace(_)) | Ok(&Token::Comment(_)) => continue, + Ok(&Token::CDO) | Ok(&Token::CDC) if self.is_stylesheet => continue, + Ok(&Token::AtKeyword(ref name)) => Some(name.clone()), + Ok(_) => None, Err(_) => return None, + }; + if let Some(name) = at_keyword { + let first_stylesheet_rule = self.is_stylesheet && !self.any_rule_so_far; + self.any_rule_so_far = true; + if first_stylesheet_rule && name.eq_ignore_ascii_case("charset") { + let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock; + let _: Result<(), ParseError<()>> = self.input.parse_until_after(delimiters, |_| Ok(())); + } else { + return Some(parse_at_rule(start_position, name.clone(), self.input, &mut self.parser)) + } + } else { + self.any_rule_so_far = true; + self.input.reset(start_position); + return Some(parse_qualified_rule(self.input, &mut self.parser) + .map_err(|e| PreciseParseError { + error: e, + span: start_position..self.input.position() + })) } } } @@ -388,15 +398,17 @@ where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + input.parse_entirely(|input| { loop { let start_position = input.position(); - match input.next_including_whitespace_and_comments()? { - Token::WhiteSpace(_) | Token::Comment(_) => {} - Token::AtKeyword(name) => { - return parse_at_rule(start_position, name, input, parser).map_err(|e| e.error) - } - _ => { - input.reset(start_position); - return parse_qualified_rule(input, parser) - } + // FIXME: remove intermediate variable when lifetimes are non-lexical + let at_keyword = match *input.next_including_whitespace_and_comments()? { + Token::WhiteSpace(_) | Token::Comment(_) => continue, + Token::AtKeyword(ref name) => Some(name.clone()), + _ => None + }; + if let Some(name) = at_keyword { + return parse_at_rule(start_position, name, input, parser).map_err(|e| e.error) + } else { + input.reset(start_position); + return parse_qualified_rule(input, parser) } } }) @@ -419,8 +431,8 @@ fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: CowRcSt match result { Ok(AtRuleType::WithoutBlock(rule)) => { match input.next() { - Ok(Token::Semicolon) | Err(_) => Ok(rule), - Ok(Token::CurlyBracketBlock) => Err(PreciseParseError { + Ok(&Token::Semicolon) | Err(_) => Ok(rule), + Ok(&Token::CurlyBracketBlock) => Err(PreciseParseError { error: ParseError::Basic(BasicParseError::UnexpectedToken(Token::CurlyBracketBlock)), span: start_position..input.position(), }), @@ -429,7 +441,7 @@ fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: CowRcSt } Ok(AtRuleType::WithBlock(prelude)) => { match input.next() { - Ok(Token::CurlyBracketBlock) => { + Ok(&Token::CurlyBracketBlock) => { // FIXME: https://github.com/rust-lang/rust/issues/42508 parse_nested_block::<'i, 't, _, _, _>(input, move |input| parser.parse_block(prelude, input)) .map_err(|e| PreciseParseError { @@ -437,7 +449,7 @@ fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: CowRcSt span: start_position..input.position(), }) } - Ok(Token::Semicolon) => Err(PreciseParseError { + Ok(&Token::Semicolon) => Err(PreciseParseError { error: ParseError::Basic(BasicParseError::UnexpectedToken(Token::Semicolon)), span: start_position..input.position() }), @@ -450,8 +462,8 @@ fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: CowRcSt } Ok(AtRuleType::OptionalBlock(prelude)) => { match input.next() { - Ok(Token::Semicolon) | Err(_) => Ok(parser.rule_without_block(prelude)), - Ok(Token::CurlyBracketBlock) => { + Ok(&Token::Semicolon) | Err(_) => Ok(parser.rule_without_block(prelude)), + Ok(&Token::CurlyBracketBlock) => { // FIXME: https://github.com/rust-lang/rust/issues/42508 parse_nested_block::<'i, 't, _, _, _>(input, move |input| parser.parse_block(prelude, input)) .map_err(|e| PreciseParseError { @@ -465,7 +477,7 @@ fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: CowRcSt Err(error) => { let end_position = input.position(); match input.next() { - Ok(Token::CurlyBracketBlock) | Ok(Token::Semicolon) | Err(_) => {}, + Ok(&Token::CurlyBracketBlock) | Ok(&Token::Semicolon) | Err(_) => {}, _ => unreachable!() }; Err(PreciseParseError { @@ -484,7 +496,7 @@ fn parse_qualified_rule<'i, 't, P, E>(input: &mut Parser<'i, 't>, parser: &mut P let prelude = parse_until_before::<'i, 't, _, _, _>(input, Delimiter::CurlyBracketBlock, |input| { parser.parse_prelude(input) }); - match input.next()? { + match *input.next()? { Token::CurlyBracketBlock => { // Do this here so that we consume the `{` even if the prelude is `Err`. let prelude = prelude?; diff --git a/src/tests.rs b/src/tests.rs index e3214238..ebe71192 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -118,7 +118,7 @@ fn component_value_list() { fn one_component_value() { run_json_tests(include_str!("css-parsing-tests/one_component_value.json"), |input| { let result: Result> = input.parse_entirely(|input| { - Ok(one_component_value_to_json(input.next()?, input)) + Ok(one_component_value_to_json(input.next()?.clone(), input)) }); result.unwrap_or(JArray!["error", "invalid"]) }); @@ -289,7 +289,7 @@ fn unquoted_url_escaping() { )\ "); let mut input = ParserInput::new(&serialized); - assert_eq!(Parser::new(&mut input).next(), Ok(token)); + assert_eq!(Parser::new(&mut input).next(), Ok(&token)); } #[test] @@ -389,9 +389,9 @@ fn serializer(preserve_comments: bool) { string: &mut String, preserve_comments: bool) { while let Ok(token) = if preserve_comments { - input.next_including_whitespace_and_comments() + input.next_including_whitespace_and_comments().map(|t| t.clone()) } else { - input.next_including_whitespace() + input.next_including_whitespace().map(|t| t.clone()) } { let token_type = token.serialization_type(); if !preserve_comments && previous_token.needs_separator_when_before(token_type) { @@ -452,24 +452,24 @@ fn line_numbers() { let mut input = ParserInput::new("foo bar\nbaz\r\n\n\"a\\\r\nb\""); let mut input = Parser::new(&mut input); assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 0 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("foo".into()))); + assert_eq!(input.next_including_whitespace(), Ok(&Token::Ident("foo".into()))); assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 3 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace(" "))); + assert_eq!(input.next_including_whitespace(), Ok(&Token::WhiteSpace(" "))); assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 4 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("bar".into()))); + assert_eq!(input.next_including_whitespace(), Ok(&Token::Ident("bar".into()))); assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 7 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\n"))); + assert_eq!(input.next_including_whitespace(), Ok(&Token::WhiteSpace("\n"))); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 0 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("baz".into()))); + assert_eq!(input.next_including_whitespace(), Ok(&Token::Ident("baz".into()))); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 3 }); let position = input.position(); - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\r\n\n"))); + assert_eq!(input.next_including_whitespace(), Ok(&Token::WhiteSpace("\r\n\n"))); assert_eq!(input.current_source_location(), SourceLocation { line: 3, column: 0 }); assert_eq!(input.source_location(position), SourceLocation { line: 1, column: 3 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::QuotedString("ab".into()))); + assert_eq!(input.next_including_whitespace(), Ok(&Token::QuotedString("ab".into()))); assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 2 }); assert!(input.next_including_whitespace().is_err()); } @@ -535,12 +535,12 @@ fn overflow() { fn line_delimited() { let mut input = ParserInput::new(" { foo ; bar } baz;,"); let mut input = Parser::new(&mut input); - assert_eq!(input.next(), Ok(Token::CurlyBracketBlock)); + assert_eq!(input.next(), Ok(&Token::CurlyBracketBlock)); assert!({ let result: Result<_, ParseError<()>> = input.parse_until_after(Delimiter::Semicolon, |_| Ok(42)); result }.is_err()); - assert_eq!(input.next(), Ok(Token::Comma)); + assert_eq!(input.next(), Ok(&Token::Comma)); assert!(input.next().is_err()); } @@ -684,7 +684,7 @@ impl<'i> DeclarationParser<'i> for JsonParser { let mut important = false; loop { let start_position = input.position(); - if let Ok(mut token) = input.next_including_whitespace() { + if let Ok(mut token) = input.next_including_whitespace().map(|t| t.clone()) { // Hack to deal with css-parsing-tests assuming that // `!important` in the middle of a declaration value is OK. // This can never happen per spec @@ -698,7 +698,7 @@ impl<'i> DeclarationParser<'i> for JsonParser { } } input.reset(start_position); - token = input.next_including_whitespace().unwrap(); + token = input.next_including_whitespace().unwrap().clone(); } value.push(one_component_value_to_json(token, input)); } else { @@ -761,7 +761,7 @@ impl<'i> QualifiedRuleParser<'i> for JsonParser { fn component_values_to_json(input: &mut Parser) -> Vec { let mut values = vec![]; - while let Ok(token) = input.next_including_whitespace() { + while let Ok(token) = input.next_including_whitespace().map(|t| t.clone()) { values.push(one_component_value_to_json(token, input)); } values @@ -926,17 +926,17 @@ fn parser_maintains_current_line() { let mut input = ParserInput::new("ident ident;\nident ident ident;\nident"); let mut parser = Parser::new(&mut input); assert_eq!(parser.current_line(), "ident ident;"); - assert_eq!(parser.next(), Ok(Token::Ident("ident".into()))); - assert_eq!(parser.next(), Ok(Token::Ident("ident".into()))); - assert_eq!(parser.next(), Ok(Token::Semicolon)); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Semicolon)); - assert_eq!(parser.next(), Ok(Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); assert_eq!(parser.current_line(), "ident ident ident;"); - assert_eq!(parser.next(), Ok(Token::Ident("ident".into()))); - assert_eq!(parser.next(), Ok(Token::Ident("ident".into()))); - assert_eq!(parser.next(), Ok(Token::Semicolon)); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Semicolon)); - assert_eq!(parser.next(), Ok(Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); assert_eq!(parser.current_line(), "ident"); } diff --git a/src/unicode_range.rs b/src/unicode_range.rs index 3fb54a84..0e1b58ac 100644 --- a/src/unicode_range.rs +++ b/src/unicode_range.rs @@ -55,12 +55,12 @@ impl UnicodeRange { } fn parse_tokens<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(), BasicParseError<'i>> { - match input.next_including_whitespace()? { + match input.next_including_whitespace()?.clone() { Token::Delim('+') => { - match input.next_including_whitespace()? { + match *input.next_including_whitespace()? { Token::Ident(_) => {} Token::Delim('?') => {} - t => return Err(BasicParseError::UnexpectedToken(t)) + ref t => return Err(BasicParseError::UnexpectedToken(t.clone())) } parse_question_marks(input) } @@ -70,9 +70,9 @@ fn parse_tokens<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(), BasicParseErro Token::Number { .. } => { let after_number = input.position(); match input.next_including_whitespace() { - Ok(Token::Delim('?')) => parse_question_marks(input), - Ok(Token::Dimension { .. }) => {} - Ok(Token::Number { .. }) => {} + Ok(&Token::Delim('?')) => parse_question_marks(input), + Ok(&Token::Dimension { .. }) => {} + Ok(&Token::Number { .. }) => {} _ => input.reset(after_number) } } @@ -86,7 +86,7 @@ fn parse_question_marks(input: &mut Parser) { loop { let position = input.position(); match input.next_including_whitespace() { - Ok(Token::Delim('?')) => {} + Ok(&Token::Delim('?')) => {} _ => { input.reset(position); return From c305bcf14375d90a3804082961d4a18d6ea717a9 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 18 Jul 2017 19:35:52 +0200 Subject: [PATCH 11/15] Return &CowRcStr instead of CowRcStr in some Parser::expect_* methods --- src/parser.rs | 22 +++++++++++----------- src/rules_and_declarations.rs | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 8aedb1be..d9ca8ca4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -486,9 +486,9 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return the unescaped value. #[inline] - pub fn expect_ident(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_ident(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { match *self.next()? { - Token::Ident(ref value) => Ok(value.clone()), + Token::Ident(ref value) => Ok(value), ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -504,19 +504,19 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return the unescaped value. #[inline] - pub fn expect_string(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { match *self.next()? { - Token::QuotedString(ref value) => Ok(value.clone()), + Token::QuotedString(ref value) => Ok(value), ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } /// Parse either a or a , and return the unescaped value. #[inline] - pub fn expect_ident_or_string(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_ident_or_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { match *self.next()? { - Token::Ident(ref value) => Ok(value.clone()), - Token::QuotedString(ref value) => Ok(value.clone()), + Token::Ident(ref value) => Ok(value), + Token::QuotedString(ref value) => Ok(value), ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } @@ -530,7 +530,7 @@ impl<'i: 't, 't> Parser<'i, 't> { Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {} ref t => return Err(BasicParseError::UnexpectedToken(t.clone())) } - self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic)) + self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic).map(|s| s.clone())) .map_err(ParseError::<()>::basic) } @@ -544,7 +544,7 @@ impl<'i: 't, 't> Parser<'i, 't> { Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {} ref t => return Err(BasicParseError::UnexpectedToken(t.clone())) } - self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic)) + self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic).map(|s| s.clone())) .map_err(ParseError::<()>::basic) } @@ -651,9 +651,9 @@ impl<'i: 't, 't> Parser<'i, 't> { /// /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. #[inline] - pub fn expect_function(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_function(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { match *self.next()? { - Token::Function(ref name) => Ok(name.clone()), + Token::Function(ref name) => Ok(name), ref t => Err(BasicParseError::UnexpectedToken(t.clone())) } } diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index f40b1fcb..5ddab806 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -380,7 +380,7 @@ pub fn parse_one_declaration<'i, 't, P, E>(input: &mut Parser<'i, 't>, parser: & where P: DeclarationParser<'i, Error = E> { let start_position = input.position(); input.parse_entirely(|input| { - let name = input.expect_ident()?; + let name = input.expect_ident()?.clone(); input.expect_colon()?; parser.parse_value(name, input) }).map_err(|e| PreciseParseError { From b806c395441f38950d18bb045fb7253a16256fe2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 20 Jul 2017 20:50:10 +0200 Subject: [PATCH 12/15] Add expect_ident_cloned and expect_string_cloned --- src/parser.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index d9ca8ca4..40f7de4d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -493,6 +493,12 @@ impl<'i: 't, 't> Parser<'i, 't> { } } + /// expect_ident, but clone the CowRcStr + #[inline] + pub fn expect_ident_cloned(&mut self) -> Result, BasicParseError<'i>> { + self.expect_ident().map(|s| s.clone()) + } + /// Parse a whose unescaped value is an ASCII-insensitive match for the given value. #[inline] pub fn expect_ident_matching(&mut self, expected_value: &str) -> Result<(), BasicParseError<'i>> { @@ -511,6 +517,12 @@ impl<'i: 't, 't> Parser<'i, 't> { } } + /// expect_string, but clone the CowRcStr + #[inline] + pub fn expect_string_cloned(&mut self) -> Result, BasicParseError<'i>> { + self.expect_string().map(|s| s.clone()) + } + /// Parse either a or a , and return the unescaped value. #[inline] pub fn expect_ident_or_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { From 336a12eeffb7bbc5fb5263b6cff71f39de2bf677 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 20 Jul 2017 22:53:18 +0200 Subject: [PATCH 13/15] Remove CowRcStr::into_owned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s pretty much useless now that the refcount is almost never one. --- src/cow_rc_str.rs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/cow_rc_str.rs b/src/cow_rc_str.rs index 3b562d7f..b1e85979 100644 --- a/src/cow_rc_str.rs +++ b/src/cow_rc_str.rs @@ -96,25 +96,6 @@ impl<'a> CowRcStr<'a> { } } } - - /// Convert into `String`, re-using an existing memory allocation if possible. - #[inline] - pub fn into_owned(self) -> String { - let unpacked = self.unpack(); - - // Inhibit destructor: we’re taking ownership of this strong reference (if any) - mem::forget(self); - - match unpacked { - Ok(s) => s.to_owned(), - Err(ptr) => { - let rc = unsafe { - Rc::from_raw(ptr) - }; - Rc::try_unwrap(rc).unwrap_or_else(|rc| (*rc).clone()) - } - } - } } impl<'a> Clone for CowRcStr<'a> { From a944f54839ce5b596914ec9a4491284b7ab5f773 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 21 Jul 2017 20:04:15 +0200 Subject: [PATCH 14/15] Fix Parser::seen_* in the presence of caching --- src/parser.rs | 5 +++++ src/tokenizer.rs | 35 +++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 40f7de4d..f9997725 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -376,6 +376,11 @@ impl<'i: 't, 't> Parser<'i, 't> { match self.input.cached_token { Some(ref cached_token) if cached_token.start_position == token_start_position => { self.input.tokenizer.reset(cached_token.end_position); + match cached_token.token { + Token::Dimension { ref unit, .. } => self.input.tokenizer.see_dimension(unit), + Token::Function(ref name) => self.input.tokenizer.see_function(name), + _ => {} + } token = &cached_token.token } _ => { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 205ef41c..a0c4233a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -244,6 +244,15 @@ impl<'a> Tokenizer<'a> { seen } + #[inline] + pub fn see_function(&mut self, name: &str) { + if self.var_functions == SeenStatus::LookingForThem { + if name.eq_ignore_ascii_case("var") { + self.var_functions = SeenStatus::SeenAtLeastOne; + } + } + } + #[inline] pub fn look_for_viewport_percentages(&mut self) { self.viewport_percentages = SeenStatus::LookingForThem; @@ -256,6 +265,18 @@ impl<'a> Tokenizer<'a> { seen } + #[inline] + pub fn see_dimension(&mut self, unit: &str) { + if self.viewport_percentages == SeenStatus::LookingForThem { + if unit.eq_ignore_ascii_case("vh") || + unit.eq_ignore_ascii_case("vw") || + unit.eq_ignore_ascii_case("vmin") || + unit.eq_ignore_ascii_case("vmax") { + self.viewport_percentages = SeenStatus::SeenAtLeastOne; + } + } + } + #[inline] pub fn next(&mut self) -> Result, ()> { next_token(self) @@ -699,10 +720,7 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { if value.eq_ignore_ascii_case("url") { consume_unquoted_url(tokenizer).unwrap_or(Function(value)) } else { - if tokenizer.var_functions == SeenStatus::LookingForThem && - value.eq_ignore_ascii_case("var") { - tokenizer.var_functions = SeenStatus::SeenAtLeastOne; - } + tokenizer.see_function(&value); Function(value) } } else { @@ -887,14 +905,7 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { let value = value as f32; if is_ident_start(tokenizer) { let unit = consume_name(tokenizer); - if tokenizer.viewport_percentages == SeenStatus::LookingForThem { - if unit.eq_ignore_ascii_case("vh") || - unit.eq_ignore_ascii_case("vw") || - unit.eq_ignore_ascii_case("vmin") || - unit.eq_ignore_ascii_case("vmax") { - tokenizer.viewport_percentages = SeenStatus::SeenAtLeastOne; - } - } + tokenizer.see_dimension(&unit); Dimension { value: value, int_value: int_value, From 850eac1b0638505f62ee05d24dce45ff66fd1f05 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 21 Jul 2017 23:28:07 +0200 Subject: [PATCH 15/15] Remove unnecesseray clone --- src/color.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/color.rs b/src/color.rs index 6dbc2142..56847800 100644 --- a/src/color.rs +++ b/src/color.rs @@ -141,6 +141,7 @@ impl Color { /// /// FIXME(#2) Deprecated CSS2 System Colors are not supported yet. pub fn parse<'i, 't>(input: &mut Parser<'i, 't>) -> Result> { + // FIXME: remove clone() when lifetimes are non-lexical let token = input.next()?.clone(); match token { Token::Hash(ref value) | Token::IDHash(ref value) => { @@ -154,7 +155,7 @@ impl Color { }).map_err(ParseError::<()>::basic); } _ => Err(()) - }.map_err(|()| BasicParseError::UnexpectedToken(token.clone())) + }.map_err(|()| BasicParseError::UnexpectedToken(token)) } /// Parse a color hash, without the leading '#' character.