From eb5b09688696562d08ebb35e34ded723e6e44277 Mon Sep 17 00:00:00 2001 From: Matthieu M Date: Sun, 3 Feb 2019 16:58:29 +0100 Subject: [PATCH 1/2] RangeInclusive internal iteration performance improvement. Specialize Iterator::try_fold and DoubleEndedIterator::try_rfold to improve code generation in all internal iteration scenarios. This changes brings the performance of internal iteration with RangeInclusive on par with the performance of iteration with Range: - Single conditional jump in hot loop, - Unrolling and vectorization, - And even Closed Form substitution. Unfortunately, it only applies to internal iteration. Despite various attempts at stream-lining the implementation of next and next_back, LLVM has stubbornly refused to optimize external iteration appropriately, leaving me with a choice between: - The current implementation, for which Closed Form substitution is performed, but which uses 2 conditional jumps in the hot loop when optimization fail. - An implementation using a "is_done" boolean, which uses 1 conditional jump in the hot loop when optimization fail, allowing unrolling and vectorization, but for which Closed Form substitution fails. In the absence of any conclusive evidence as to which usecase matters most, and with no assurance that the lack of Closed Form substitution is not indicative of other optimizations being foiled, there is no way to pick one implementation over the other, and thus I defer to the statu quo as far as next and next_back are concerned. --- src/libcore/iter/range.rs | 51 ++++++++++++++++++++++++++++++++++++--- src/libcore/ops/range.rs | 2 ++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/libcore/iter/range.rs b/src/libcore/iter/range.rs index 66c09a0ddd0fb..52b0ccd48d476 100644 --- a/src/libcore/iter/range.rs +++ b/src/libcore/iter/range.rs @@ -1,6 +1,6 @@ use convert::TryFrom; use mem; -use ops::{self, Add, Sub}; +use ops::{self, Add, Sub, Try}; use usize; use super::{FusedIterator, TrustedLen}; @@ -368,11 +368,11 @@ impl Iterator for ops::RangeInclusive { Some(Less) => { self.is_empty = Some(false); self.start = plus_n.add_one(); - return Some(plus_n) + return Some(plus_n); } Some(Equal) => { self.is_empty = Some(true); - return Some(plus_n) + return Some(plus_n); } _ => {} } @@ -382,6 +382,29 @@ impl Iterator for ops::RangeInclusive { None } + #[inline] + fn try_fold(&mut self, init: B, mut f: F) -> R + where + Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try + { + self.compute_is_empty(); + + let mut accum = init; + + while self.start < self.end { + let n = self.start.add_one(); + let n = mem::replace(&mut self.start, n); + accum = f(accum, n)?; + } + + if self.start == self.end { + accum = f(accum, self.start.clone())?; + } + + self.is_empty = Some(true); + Try::from_ok(accum) + } + #[inline] fn last(mut self) -> Option { self.next_back() @@ -415,6 +438,28 @@ impl DoubleEndedIterator for ops::RangeInclusive { self.end.clone() }) } + + #[inline] + fn try_rfold(&mut self, init: B, mut f: F) -> R where + Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try + { + self.compute_is_empty(); + + let mut accum = init; + + while self.start < self.end { + let n = self.end.sub_one(); + let n = mem::replace(&mut self.end, n); + accum = f(accum, n)?; + } + + if self.start == self.end { + accum = f(accum, self.start.clone())?; + } + + self.is_empty = Some(true); + Try::from_ok(accum) + } } #[stable(feature = "fused", since = "1.26.0")] diff --git a/src/libcore/ops/range.rs b/src/libcore/ops/range.rs index 815a4cfeed88e..6776ebdc66edc 100644 --- a/src/libcore/ops/range.rs +++ b/src/libcore/ops/range.rs @@ -334,12 +334,14 @@ pub struct RangeInclusive { trait RangeInclusiveEquality: Sized { fn canonicalized_is_empty(range: &RangeInclusive) -> bool; } + impl RangeInclusiveEquality for T { #[inline] default fn canonicalized_is_empty(range: &RangeInclusive) -> bool { range.is_empty.unwrap_or_default() } } + impl RangeInclusiveEquality for T { #[inline] fn canonicalized_is_empty(range: &RangeInclusive) -> bool { From 4fed67f94220351ffa60de1dca078c02a7c15734 Mon Sep 17 00:00:00 2001 From: Matthieu M Date: Sat, 9 Feb 2019 18:42:34 +0100 Subject: [PATCH 2/2] Fix exhaustion of inclusive range try_fold and try_rfold --- src/libcore/iter/range.rs | 14 ++++++++++++-- src/libcore/tests/iter.rs | 24 +++++++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/libcore/iter/range.rs b/src/libcore/iter/range.rs index 52b0ccd48d476..f0ed88c3dfd85 100644 --- a/src/libcore/iter/range.rs +++ b/src/libcore/iter/range.rs @@ -389,6 +389,10 @@ impl Iterator for ops::RangeInclusive { { self.compute_is_empty(); + if self.is_empty() { + return Try::from_ok(init); + } + let mut accum = init; while self.start < self.end { @@ -397,11 +401,12 @@ impl Iterator for ops::RangeInclusive { accum = f(accum, n)?; } + self.is_empty = Some(true); + if self.start == self.end { accum = f(accum, self.start.clone())?; } - self.is_empty = Some(true); Try::from_ok(accum) } @@ -445,6 +450,10 @@ impl DoubleEndedIterator for ops::RangeInclusive { { self.compute_is_empty(); + if self.is_empty() { + return Try::from_ok(init); + } + let mut accum = init; while self.start < self.end { @@ -453,11 +462,12 @@ impl DoubleEndedIterator for ops::RangeInclusive { accum = f(accum, n)?; } + self.is_empty = Some(true); + if self.start == self.end { accum = f(accum, self.start.clone())?; } - self.is_empty = Some(true); Try::from_ok(accum) } } diff --git a/src/libcore/tests/iter.rs b/src/libcore/tests/iter.rs index cf19851c17b35..89e190e074f1a 100644 --- a/src/libcore/tests/iter.rs +++ b/src/libcore/tests/iter.rs @@ -1738,19 +1738,37 @@ fn test_range_inclusive_folds() { assert_eq!((1..=10).sum::(), 55); assert_eq!((1..=10).rev().sum::(), 55); - let mut it = 40..=50; + let mut it = 44..=50; assert_eq!(it.try_fold(0, i8::checked_add), None); - assert_eq!(it, 44..=50); + assert_eq!(it, 47..=50); + assert_eq!(it.try_fold(0, i8::checked_add), None); + assert_eq!(it, 50..=50); + assert_eq!(it.try_fold(0, i8::checked_add), Some(50)); + assert!(it.is_empty()); + assert_eq!(it.try_fold(0, i8::checked_add), Some(0)); + assert!(it.is_empty()); + + let mut it = 40..=47; + assert_eq!(it.try_rfold(0, i8::checked_add), None); + assert_eq!(it, 40..=44); assert_eq!(it.try_rfold(0, i8::checked_add), None); - assert_eq!(it, 44..=47); + assert_eq!(it, 40..=41); + assert_eq!(it.try_rfold(0, i8::checked_add), Some(81)); + assert!(it.is_empty()); + assert_eq!(it.try_rfold(0, i8::checked_add), Some(0)); + assert!(it.is_empty()); let mut it = 10..=20; assert_eq!(it.try_fold(0, |a,b| Some(a+b)), Some(165)); assert!(it.is_empty()); + assert_eq!(it.try_fold(0, |a,b| Some(a+b)), Some(0)); + assert!(it.is_empty()); let mut it = 10..=20; assert_eq!(it.try_rfold(0, |a,b| Some(a+b)), Some(165)); assert!(it.is_empty()); + assert_eq!(it.try_rfold(0, |a,b| Some(a+b)), Some(0)); + assert!(it.is_empty()); } #[test]