diff --git a/src/liballoc/benches/lib.rs b/src/liballoc/benches/lib.rs
index 09685d1bb40b0..a43aadfe9a23a 100644
--- a/src/liballoc/benches/lib.rs
+++ b/src/liballoc/benches/lib.rs
@@ -13,6 +13,7 @@
 #![cfg_attr(stage0, feature(i128_type))]
 #![feature(rand)]
 #![feature(repr_simd)]
+#![feature(slice_sort_by_cached_key)]
 #![feature(test)]
 
 extern crate rand;
diff --git a/src/liballoc/benches/slice.rs b/src/liballoc/benches/slice.rs
index ee5182a1d4663..a699ff9c0a76e 100644
--- a/src/liballoc/benches/slice.rs
+++ b/src/liballoc/benches/slice.rs
@@ -284,6 +284,17 @@ macro_rules! sort_expensive {
     }
 }
 
+macro_rules! sort_lexicographic {
+    ($f:ident, $name:ident, $gen:expr, $len:expr) => {
+        #[bench]
+        fn $name(b: &mut Bencher) {
+            let v = $gen($len);
+            b.iter(|| v.clone().$f(|x| x.to_string()));
+            b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
+        }
+    }
+}
+
 sort!(sort, sort_small_ascending, gen_ascending, 10);
 sort!(sort, sort_small_descending, gen_descending, 10);
 sort!(sort, sort_small_random, gen_random, 10);
@@ -312,6 +323,10 @@ sort!(sort_unstable, sort_unstable_large_big, gen_big_random, 10000);
 sort_strings!(sort_unstable, sort_unstable_large_strings, gen_strings, 10000);
 sort_expensive!(sort_unstable_by, sort_unstable_large_expensive, gen_random, 10000);
 
+sort_lexicographic!(sort_by_key, sort_by_key_lexicographic, gen_random, 10000);
+sort_lexicographic!(sort_unstable_by_key, sort_unstable_by_key_lexicographic, gen_random, 10000);
+sort_lexicographic!(sort_by_cached_key, sort_by_cached_key_lexicographic, gen_random, 10000);
+
 macro_rules! reverse {
     ($name:ident, $ty:ty, $f:expr) => {
         #[bench]
diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs
index dc40062ef13df..68f2313843c31 100644
--- a/src/liballoc/slice.rs
+++ b/src/liballoc/slice.rs
@@ -102,6 +102,7 @@ use core::mem::size_of;
 use core::mem;
 use core::ptr;
 use core::slice as core_slice;
+use core::{u8, u16, u32};
 
 use borrow::{Borrow, BorrowMut, ToOwned};
 use boxed::Box;
@@ -1302,7 +1303,8 @@ impl<T> [T] {
 
     /// Sorts the slice with a key extraction function.
     ///
-    /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+    /// This sort is stable (i.e. does not reorder equal elements) and `O(m n log(m n))`
+    /// worst-case, where the key function is `O(m)`.
     ///
     /// When applicable, unstable sorting is preferred because it is generally faster than stable
     /// sorting and it doesn't allocate auxiliary memory.
@@ -1328,12 +1330,82 @@ impl<T> [T] {
     /// ```
     #[stable(feature = "slice_sort_by_key", since = "1.7.0")]
     #[inline]
-    pub fn sort_by_key<B, F>(&mut self, mut f: F)
-        where F: FnMut(&T) -> B, B: Ord
+    pub fn sort_by_key<K, F>(&mut self, mut f: F)
+        where F: FnMut(&T) -> K, K: Ord
     {
         merge_sort(self, |a, b| f(a).lt(&f(b)));
     }
 
+    /// Sorts the slice with a key extraction function.
+    ///
+    /// During sorting, the key function is called only once per element.
+    ///
+    /// This sort is stable (i.e. does not reorder equal elements) and `O(m n + n log n)`
+    /// worst-case, where the key function is `O(m)`.
+    ///
+    /// For simple key functions (e.g. functions that are property accesses or
+    /// basic operations), [`sort_by_key`](#method.sort_by_key) is likely to be
+    /// faster.
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
+    /// which combines the fast average case of randomized quicksort with the fast worst case of
+    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
+    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
+    /// deterministic behavior.
+    ///
+    /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
+    /// length of the slice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(slice_sort_by_cached_key)]
+    /// let mut v = [-5i32, 4, 32, -3, 2];
+    ///
+    /// v.sort_by_cached_key(|k| k.to_string());
+    /// assert!(v == [-3, -5, 2, 32, 4]);
+    /// ```
+    ///
+    /// [pdqsort]: https://github.com/orlp/pdqsort
+    #[unstable(feature = "slice_sort_by_cached_key", issue = "34447")]
+    #[inline]
+    pub fn sort_by_cached_key<K, F>(&mut self, f: F)
+        where F: FnMut(&T) -> K, K: Ord
+    {
+        // Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
+        macro_rules! sort_by_key {
+            ($t:ty, $slice:ident, $f:ident) => ({
+                let mut indices: Vec<_> =
+                    $slice.iter().map($f).enumerate().map(|(i, k)| (k, i as $t)).collect();
+                // The elements of `indices` are unique, as they are indexed, so any sort will be
+                // stable with respect to the original slice. We use `sort_unstable` here because
+                // it requires less memory allocation.
+                indices.sort_unstable();
+                for i in 0..$slice.len() {
+                    let mut index = indices[i].1;
+                    while (index as usize) < i {
+                        index = indices[index as usize].1;
+                    }
+                    indices[i].1 = index;
+                    $slice.swap(i, index as usize);
+                }
+            })
+        }
+
+        let sz_u8    = mem::size_of::<(K, u8)>();
+        let sz_u16   = mem::size_of::<(K, u16)>();
+        let sz_u32   = mem::size_of::<(K, u32)>();
+        let sz_usize = mem::size_of::<(K, usize)>();
+
+        let len = self.len();
+        if sz_u8  < sz_u16   && len <= ( u8::MAX as usize) { return sort_by_key!( u8, self, f) }
+        if sz_u16 < sz_u32   && len <= (u16::MAX as usize) { return sort_by_key!(u16, self, f) }
+        if sz_u32 < sz_usize && len <= (u32::MAX as usize) { return sort_by_key!(u32, self, f) }
+        sort_by_key!(usize, self, f)
+    }
+
     /// Sorts the slice, but may not preserve the order of equal elements.
     ///
     /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
@@ -1410,7 +1482,7 @@ impl<T> [T] {
     /// elements.
     ///
     /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
-    /// and `O(n log n)` worst-case.
+    /// and `O(m n log(m n))` worst-case, where the key function is `O(m)`.
     ///
     /// # Current implementation
     ///
@@ -1420,9 +1492,6 @@ impl<T> [T] {
     /// randomization to avoid degenerate cases, but with a fixed seed to always provide
     /// deterministic behavior.
     ///
-    /// It is typically faster than stable sorting, except in a few special cases, e.g. when the
-    /// slice consists of several concatenated sorted sequences.
-    ///
     /// # Examples
     ///
     /// ```
@@ -1435,9 +1504,8 @@ impl<T> [T] {
     /// [pdqsort]: https://github.com/orlp/pdqsort
     #[stable(feature = "sort_unstable", since = "1.20.0")]
     #[inline]
-    pub fn sort_unstable_by_key<B, F>(&mut self, f: F)
-        where F: FnMut(&T) -> B,
-              B: Ord
+    pub fn sort_unstable_by_key<K, F>(&mut self, f: F)
+        where F: FnMut(&T) -> K, K: Ord
     {
         core_slice::SliceExt::sort_unstable_by_key(self, f);
     }
diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs
index bcd2ef2760583..0a7e9a8be94f9 100644
--- a/src/liballoc/tests/lib.rs
+++ b/src/liballoc/tests/lib.rs
@@ -23,6 +23,7 @@
 #![feature(pattern)]
 #![feature(placement_in_syntax)]
 #![feature(rand)]
+#![feature(slice_sort_by_cached_key)]
 #![feature(splice)]
 #![feature(str_escape)]
 #![feature(string_retain)]
diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs
index 3f679d81f08de..99d9c51efc757 100644
--- a/src/liballoc/tests/slice.rs
+++ b/src/liballoc/tests/slice.rs
@@ -425,6 +425,14 @@ fn test_sort() {
                 v.sort_by(|a, b| b.cmp(a));
                 assert!(v.windows(2).all(|w| w[0] >= w[1]));
 
+                // Sort in lexicographic order.
+                let mut v1 = orig.clone();
+                let mut v2 = orig.clone();
+                v1.sort_by_key(|x| x.to_string());
+                v2.sort_by_cached_key(|x| x.to_string());
+                assert!(v1.windows(2).all(|w| w[0].to_string() <= w[1].to_string()));
+                assert!(v1 == v2);
+
                 // Sort with many pre-sorted runs.
                 let mut v = orig.clone();
                 v.sort();
@@ -477,7 +485,7 @@ fn test_sort_stability() {
             // the second item represents which occurrence of that
             // number this element is, i.e. the second elements
             // will occur in sorted order.
-            let mut v: Vec<_> = (0..len)
+            let mut orig: Vec<_> = (0..len)
                 .map(|_| {
                     let n = thread_rng().gen::<usize>() % 10;
                     counts[n] += 1;
@@ -485,16 +493,21 @@ fn test_sort_stability() {
                 })
                 .collect();
 
-            // only sort on the first element, so an unstable sort
+            let mut v = orig.clone();
+            // Only sort on the first element, so an unstable sort
             // may mix up the counts.
             v.sort_by(|&(a, _), &(b, _)| a.cmp(&b));
 
-            // this comparison includes the count (the second item
+            // This comparison includes the count (the second item
             // of the tuple), so elements with equal first items
             // will need to be ordered with increasing
             // counts... i.e. exactly asserting that this sort is
             // stable.
             assert!(v.windows(2).all(|w| w[0] <= w[1]));
+
+            let mut v = orig.clone();
+            v.sort_by_cached_key(|&(x, _)| x);
+            assert!(v.windows(2).all(|w| w[0] <= w[1]));
         }
     }
 }