From a1ad6346d6a0df509d2615acbce9e443d202323a Mon Sep 17 00:00:00 2001
From: Zachary S <zasample18+github@gmail.com>
Date: Fri, 10 May 2024 13:44:19 -0500
Subject: [PATCH 01/14] Add fn allocator method to rc/sync::Weak. Relax
 Rc<T>/Arc<T>::allocator to allow unsized T.

---
 library/alloc/src/rc.rs   | 28 ++++++++++++++++++----------
 library/alloc/src/sync.rs | 28 ++++++++++++++++++----------
 2 files changed, 36 insertions(+), 20 deletions(-)
diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs
index c245b42c3e880..888bb7636c0b4 100644
--- a/library/alloc/src/rc.rs
+++ b/library/alloc/src/rc.rs
@@ -661,16 +661,6 @@ impl<T> Rc<T> {
 }
 
 impl<T, A: Allocator> Rc<T, A> {
-    /// Returns a reference to the underlying allocator.
-    ///
-    /// Note: this is an associated function, which means that you have
-    /// to call it as `Rc::allocator(&r)` instead of `r.allocator()`. This
-    /// is so that there is no conflict with a method on the inner type.
-    #[inline]
-    #[unstable(feature = "allocator_api", issue = "32838")]
-    pub fn allocator(this: &Self) -> &A {
-        &this.alloc
-    }
     /// Constructs a new `Rc` in the provided allocator.
     ///
     /// # Examples
@@ -1333,6 +1323,17 @@ impl<T: ?Sized> Rc<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Rc<T, A> {
+    /// Returns a reference to the underlying allocator.
+    ///
+    /// Note: this is an associated function, which means that you have
+    /// to call it as `Rc::allocator(&r)` instead of `r.allocator()`. This
+    /// is so that there is no conflict with a method on the inner type.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(this: &Self) -> &A {
+        &this.alloc
+    }
+
     /// Consumes the `Rc`, returning the wrapped pointer.
     ///
     /// To avoid a memory leak the pointer must be converted back to an `Rc` using
@@ -2923,6 +2924,13 @@ impl<T: ?Sized> Weak<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Weak<T, A> {
+    /// Returns a reference to the underlying allocator.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(&self) -> &A {
+        &self.alloc
+    }
+
     /// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
     ///
     /// The pointer is valid only if there are some strong references. The pointer may be dangling,
diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs
index 297a273d274bf..45ba13f696393 100644
--- a/library/alloc/src/sync.rs
+++ b/library/alloc/src/sync.rs
@@ -683,16 +683,6 @@ impl<T> Arc<T> {
 }
 
 impl<T, A: Allocator> Arc<T, A> {
-    /// Returns a reference to the underlying allocator.
-    ///
-    /// Note: this is an associated function, which means that you have
-    /// to call it as `Arc::allocator(&a)` instead of `a.allocator()`. This
-    /// is so that there is no conflict with a method on the inner type.
-    #[inline]
-    #[unstable(feature = "allocator_api", issue = "32838")]
-    pub fn allocator(this: &Self) -> &A {
-        &this.alloc
-    }
     /// Constructs a new `Arc<T>` in the provided allocator.
     ///
     /// # Examples
@@ -1473,6 +1463,17 @@ impl<T: ?Sized> Arc<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Arc<T, A> {
+    /// Returns a reference to the underlying allocator.
+    ///
+    /// Note: this is an associated function, which means that you have
+    /// to call it as `Arc::allocator(&a)` instead of `a.allocator()`. This
+    /// is so that there is no conflict with a method on the inner type.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(this: &Self) -> &A {
+        &this.alloc
+    }
+
     /// Consumes the `Arc`, returning the wrapped pointer.
     ///
     /// To avoid a memory leak the pointer must be converted back to an `Arc` using
@@ -2661,6 +2662,13 @@ impl<T: ?Sized> Weak<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Weak<T, A> {
+    /// Returns a reference to the underlying allocator.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(&self) -> &A {
+        &self.alloc
+    }
+
     /// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
     ///
     /// The pointer is valid only if there are some strong references. The pointer may be dangling,

From 5c46acac0431a5fe0fbf8f5239a7d3364c9186cf Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Sat, 22 Jun 2024 14:38:14 +0200
Subject: [PATCH 02/14] document the cvt methods

---
 library/std/src/sys/pal/unix/mod.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/library/std/src/sys/pal/unix/mod.rs b/library/std/src/sys/pal/unix/mod.rs
index b370f06e92baf..ae257cab1e50b 100644
--- a/library/std/src/sys/pal/unix/mod.rs
+++ b/library/std/src/sys/pal/unix/mod.rs
@@ -307,10 +307,13 @@ macro_rules! impl_is_minus_one {
 
 impl_is_minus_one! { i8 i16 i32 i64 isize }
 
+/// Convert native return values to Result using the *-1 means error is in `errno`*  convention.
+/// Non-error values are `Ok`-wrapped.
 pub fn cvt<T: IsMinusOne>(t: T) -> crate::io::Result<T> {
     if t.is_minus_one() { Err(crate::io::Error::last_os_error()) } else { Ok(t) }
 }
 
+/// `-1` → look at `errno` → retry on `EINTR`. Otherwise `Ok()`-wrap the closure return value.
 pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
 where
     T: IsMinusOne,
@@ -325,6 +328,7 @@ where
 }
 
 #[allow(dead_code)] // Not used on all platforms.
+/// Zero means `Ok()`, all other values are treated as raw OS errors. Does not look at `errno`.
 pub fn cvt_nz(error: libc::c_int) -> crate::io::Result<()> {
     if error == 0 { Ok(()) } else { Err(crate::io::Error::from_raw_os_error(error)) }
 }

From 6687a3f7da60a4d0f06fd84fea75bec1dd0fce2a Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Sat, 22 Jun 2024 14:37:12 +0200
Subject: [PATCH 03/14] use pidfd_spawn for faster process creation when pidfds
 are requested

---
 .../std/src/sys/pal/unix/linux/pidfd/tests.rs | 13 ++-
 .../src/sys/pal/unix/process/process_unix.rs  | 99 ++++++++++++++++++-
 2 files changed, 106 insertions(+), 6 deletions(-)

diff --git a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
index 6d9532f2ef1ff..672cb0efed1d1 100644
--- a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
+++ b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
@@ -1,7 +1,7 @@
 use crate::assert_matches::assert_matches;
 use crate::os::fd::{AsRawFd, RawFd};
-use crate::os::linux::process::{ChildExt, CommandExt};
-use crate::os::unix::process::ExitStatusExt;
+use crate::os::linux::process::{ChildExt, CommandExt as _};
+use crate::os::unix::process::{CommandExt as _, ExitStatusExt};
 use crate::process::Command;
 
 #[test]
@@ -42,6 +42,15 @@ fn test_command_pidfd() {
         .unwrap()
         .pidfd()
         .expect_err("pidfd should not have been created");
+
+    // exercise the fork/exec path since the earlier attempts may have used pidfd_spawnp()
+    let mut child =
+        unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
+
+    if pidfd_open_available {
+        assert!(child.pidfd().is_ok())
+    }
+    child.wait().expect("error waiting on child");
 }
 
 #[test]
diff --git a/library/std/src/sys/pal/unix/process/process_unix.rs b/library/std/src/sys/pal/unix/process/process_unix.rs
index 32382d9a50cf4..3de66d9789fbd 100644
--- a/library/std/src/sys/pal/unix/process/process_unix.rs
+++ b/library/std/src/sys/pal/unix/process/process_unix.rs
@@ -449,17 +449,70 @@ impl Command {
         use crate::mem::MaybeUninit;
         use crate::sys::weak::weak;
         use crate::sys::{self, cvt_nz, on_broken_pipe_flag_used};
+        #[cfg(target_os = "linux")]
+        use core::sync::atomic::{AtomicU8, Ordering};
 
         if self.get_gid().is_some()
             || self.get_uid().is_some()
             || (self.env_saw_path() && !self.program_is_path())
             || !self.get_closures().is_empty()
             || self.get_groups().is_some()
-            || self.get_create_pidfd()
         {
             return Ok(None);
         }
 
+        cfg_if::cfg_if! {
+            if #[cfg(target_os = "linux")] {
+                weak! {
+                    fn pidfd_spawnp(
+                        *mut libc::c_int,
+                        *const libc::c_char,
+                        *const libc::posix_spawn_file_actions_t,
+                        *const libc::posix_spawnattr_t,
+                        *const *mut libc::c_char,
+                        *const *mut libc::c_char
+                    ) -> libc::c_int
+                }
+
+                weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
+
+                static PIDFD_SPAWN_SUPPORTED: AtomicU8 = AtomicU8::new(0);
+                const UNKNOWN: u8 = 0;
+                const YES: u8 = 1;
+                // NO currently forces a fallback to fork/exec. We could be more nuanced here and keep using spawn
+                // if we know pidfd's aren't supported at all and the fallback would be futile.
+                const NO: u8 = 2;
+
+                if self.get_create_pidfd() {
+                    let flag = PIDFD_SPAWN_SUPPORTED.load(Ordering::Relaxed);
+                    if flag == NO || pidfd_spawnp.get().is_none() || pidfd_getpid.get().is_none() {
+                        return Ok(None);
+                    }
+                    if flag == UNKNOWN {
+                        let mut support = NO;
+                        let our_pid = crate::process::id();
+                        let pidfd =
+                            unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as libc::c_int;
+                        if pidfd >= 0 {
+                            let pid = unsafe { pidfd_getpid.get().unwrap()(pidfd) } as u32;
+                            unsafe { libc::close(pidfd) };
+                            if pid == our_pid {
+                                support = YES
+                            };
+                        }
+                        PIDFD_SPAWN_SUPPORTED.store(support, Ordering::Relaxed);
+                        if support != YES {
+                            return Ok(None);
+                        }
+                    }
+                }
+            } else {
+                if self.get_create_pidfd() {
+                    unreachable!("only implemented on linux")
+                }
+            }
+        }
+
         // Only glibc 2.24+ posix_spawn() supports returning ENOENT directly.
         #[cfg(all(target_os = "linux", target_env = "gnu"))]
         {
@@ -543,9 +596,6 @@ impl Command {
 
         let pgroup = self.get_pgroup();
 
-        // Safety: -1 indicates we don't have a pidfd.
-        let mut p = unsafe { Process::new(0, -1) };
-
         struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
 
         impl Drop for PosixSpawnFileActions<'_> {
@@ -640,6 +690,47 @@ impl Command {
             #[cfg(target_os = "nto")]
             let spawn_fn = retrying_libc_posix_spawnp;
 
+            #[cfg(target_os = "linux")]
+            if self.get_create_pidfd() {
+                let mut pidfd: libc::c_int = -1;
+                let spawn_res = pidfd_spawnp.get().unwrap()(
+                    &mut pidfd,
+                    self.get_program_cstr().as_ptr(),
+                    file_actions.0.as_ptr(),
+                    attrs.0.as_ptr(),
+                    self.get_argv().as_ptr() as *const _,
+                    envp as *const _,
+                );
+
+                let spawn_res = cvt_nz(spawn_res);
+                if let Err(ref e) = spawn_res
+                    && e.raw_os_error() == Some(libc::ENOSYS)
+                {
+                    PIDFD_SPAWN_SUPPORTED.store(NO, Ordering::Relaxed);
+                    return Ok(None);
+                }
+                spawn_res?;
+
+                let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) {
+                    Ok(pid) => pid,
+                    Err(e) => {
+                        // The child has been spawned and we are holding its pidfd.
+                        // But we cannot obtain its pid even though pidfd_getpid support was verified earlier.
+                        // This might happen if libc can't open procfs because the file descriptor limit has been reached.
+                        libc::close(pidfd);
+                        return Err(Error::new(
+                            e.kind(),
+                            "pidfd_spawnp succeeded but the child's PID could not be obtained",
+                        ));
+                    }
+                };
+
+                return Ok(Some(Process::new(pid, pidfd)));
+            }
+
+            // Safety: -1 indicates we don't have a pidfd.
+            let mut p = Process::new(0, -1);
+
             let spawn_res = spawn_fn(
                 &mut p.pid,
                 self.get_program_cstr().as_ptr(),

From 0ce361938eddf08da88e4c35e0ed63dbb204b2f2 Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Mon, 24 Jun 2024 23:10:17 +0200
Subject: [PATCH 04/14] document safety properties of the internal Process::new
 constructor

---
 library/std/src/sys/pal/unix/process/process_unix.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/library/std/src/sys/pal/unix/process/process_unix.rs b/library/std/src/sys/pal/unix/process/process_unix.rs
index 3de66d9789fbd..23dce4ea5fb71 100644
--- a/library/std/src/sys/pal/unix/process/process_unix.rs
+++ b/library/std/src/sys/pal/unix/process/process_unix.rs
@@ -877,6 +877,12 @@ pub struct Process {
 
 impl Process {
     #[cfg(target_os = "linux")]
+    /// # Safety
+    ///
+    /// `pidfd` must either be -1 (representing no file descriptor) or a valid, exclusively owned file
+    /// descriptor (See [I/O Safety]).
+    ///
+    /// [I/O Safety]: crate::io#io-safety
     unsafe fn new(pid: pid_t, pidfd: pid_t) -> Self {
         use crate::os::unix::io::FromRawFd;
         use crate::sys_common::FromInner;

From 3e4e31b7bf34dafa4dc3fc97e454a046886692da Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Tue, 25 Jun 2024 00:14:55 +0200
Subject: [PATCH 05/14] more fine-grained feature-detection for pidfd spawning

we now distinguish between pidfd_spawn support, pidfd-via-fork/exec and not-supported
---
 .../src/sys/pal/unix/process/process_unix.rs  | 54 +++++++++++--------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/library/std/src/sys/pal/unix/process/process_unix.rs b/library/std/src/sys/pal/unix/process/process_unix.rs
index 23dce4ea5fb71..abd4a334783e4 100644
--- a/library/std/src/sys/pal/unix/process/process_unix.rs
+++ b/library/std/src/sys/pal/unix/process/process_unix.rs
@@ -476,35 +476,47 @@ impl Command {
 
                 weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
 
-                static PIDFD_SPAWN_SUPPORTED: AtomicU8 = AtomicU8::new(0);
+                static PIDFD_SUPPORTED: AtomicU8 = AtomicU8::new(0);
                 const UNKNOWN: u8 = 0;
-                const YES: u8 = 1;
-                // NO currently forces a fallback to fork/exec. We could be more nuanced here and keep using spawn
-                // if we know pidfd's aren't supported at all and the fallback would be futile.
-                const NO: u8 = 2;
+                const SPAWN: u8 = 1;
+                // Obtaining a pidfd via the fork+exec path might work
+                const FORK_EXEC: u8 = 2;
+                // Neither pidfd_spawn nor fork/exec will get us a pidfd.
+                // Instead we'll just posix_spawn if the other preconditions are met.
+                const NO: u8 = 3;
 
                 if self.get_create_pidfd() {
-                    let flag = PIDFD_SPAWN_SUPPORTED.load(Ordering::Relaxed);
-                    if flag == NO || pidfd_spawnp.get().is_none() || pidfd_getpid.get().is_none() {
+                    let mut support = PIDFD_SUPPORTED.load(Ordering::Relaxed);
+                    if support == FORK_EXEC {
                         return Ok(None);
                     }
-                    if flag == UNKNOWN {
-                        let mut support = NO;
+                    if support == UNKNOWN {
+                        support = NO;
                         let our_pid = crate::process::id();
-                        let pidfd =
-                            unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as libc::c_int;
-                        if pidfd >= 0 {
-                            let pid = unsafe { pidfd_getpid.get().unwrap()(pidfd) } as u32;
-                            unsafe { libc::close(pidfd) };
-                            if pid == our_pid {
-                                support = YES
-                            };
+                        let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int);
+                        match pidfd {
+                            Ok(pidfd) => {
+                                support = FORK_EXEC;
+                                if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) {
+                                    if pidfd_spawnp.get().is_some() && pid as u32 == our_pid {
+                                        support = SPAWN
+                                    }
+                                }
+                                unsafe { libc::close(pidfd) };
+                            }
+                            Err(e) if e.raw_os_error() == Some(libc::EMFILE) => {
+                                // We're temporarily(?) out of file descriptors.  In this case obtaining a pidfd would also fail
+                                // Don't update the support flag so we can probe again later.
+                                return Err(e)
+                            }
+                            _ => {}
                         }
-                        PIDFD_SPAWN_SUPPORTED.store(support, Ordering::Relaxed);
-                        if support != YES {
+                        PIDFD_SUPPORTED.store(support, Ordering::Relaxed);
+                        if support == FORK_EXEC {
                             return Ok(None);
                         }
                     }
+                    core::assert_matches::debug_assert_matches!(support, SPAWN | NO);
                 }
             } else {
                 if self.get_create_pidfd() {
@@ -691,7 +703,7 @@ impl Command {
             let spawn_fn = retrying_libc_posix_spawnp;
 
             #[cfg(target_os = "linux")]
-            if self.get_create_pidfd() {
+            if self.get_create_pidfd() && PIDFD_SUPPORTED.load(Ordering::Relaxed) == SPAWN {
                 let mut pidfd: libc::c_int = -1;
                 let spawn_res = pidfd_spawnp.get().unwrap()(
                     &mut pidfd,
@@ -706,7 +718,7 @@ impl Command {
                 if let Err(ref e) = spawn_res
                     && e.raw_os_error() == Some(libc::ENOSYS)
                 {
-                    PIDFD_SPAWN_SUPPORTED.store(NO, Ordering::Relaxed);
+                    PIDFD_SUPPORTED.store(FORK_EXEC, Ordering::Relaxed);
                     return Ok(None);
                 }
                 spawn_res?;

From ec0c755704bba1b6c4faa0b10aa0d886cdfa309e Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Tue, 25 Jun 2024 00:17:31 +0200
Subject: [PATCH 06/14] Check that we get somewhat sane PIDs when spawning with
 pidfds

---
 library/std/src/sys/pal/unix/linux/pidfd/tests.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
index 672cb0efed1d1..fb928c76fbd04 100644
--- a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
+++ b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
@@ -21,6 +21,7 @@ fn test_command_pidfd() {
         let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
         assert!(flags & libc::FD_CLOEXEC != 0);
     }
+    assert!(child.id() > 0 && child.id() < -1i32 as u32);
     let status = child.wait().expect("error waiting on pidfd");
     assert_eq!(status.code(), Some(1));
 
@@ -47,6 +48,8 @@ fn test_command_pidfd() {
     let mut child =
         unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
 
+    assert!(child.id() > 0 && child.id() < -1i32 as u32);
+
     if pidfd_open_available {
         assert!(child.pidfd().is_ok())
     }

From 53d3e6217bd2cc2f0a6949afe4f5cf12abef83b4 Mon Sep 17 00:00:00 2001
From: David Tolnay <dtolnay@gmail.com>
Date: Sat, 6 Jul 2024 13:43:42 -0700
Subject: [PATCH 07/14] Stabilize const_cstr_from_ptr (CStr::from_ptr,
 CStr::count_bytes)

---
 library/core/src/ffi/c_str.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs
index d2a408485d162..f845dfc1fc413 100644
--- a/library/core/src/ffi/c_str.rs
+++ b/library/core/src/ffi/c_str.rs
@@ -263,8 +263,6 @@ impl CStr {
     /// ```
     ///
     /// ```
-    /// #![feature(const_cstr_from_ptr)]
-    ///
     /// use std::ffi::{c_char, CStr};
     ///
     /// const HELLO_PTR: *const c_char = {
@@ -280,7 +278,7 @@ impl CStr {
     #[inline] // inline is necessary for codegen to see strlen.
     #[must_use]
     #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
+    #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
     pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr {
         // SAFETY: The caller has provided a pointer that points to a valid C
         // string with a NUL terminator less than `isize::MAX` from `ptr`.
@@ -542,7 +540,7 @@ impl CStr {
     #[must_use]
     #[doc(alias("len", "strlen"))]
     #[stable(feature = "cstr_count_bytes", since = "1.79.0")]
-    #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
+    #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
     pub const fn count_bytes(&self) -> usize {
         self.inner.len() - 1
     }
@@ -742,6 +740,8 @@ impl AsRef<CStr> for CStr {
 /// The pointer must point to a valid buffer that contains a NUL terminator. The NUL must be
 /// located within `isize::MAX` from `ptr`.
 #[inline]
+#[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
+#[rustc_allow_const_fn_unstable(const_eval_select)]
 const unsafe fn const_strlen(ptr: *const c_char) -> usize {
     const fn strlen_ct(s: *const c_char) -> usize {
         let mut len = 0;

From 0134bd2e670140e7f3ef98007f811ba2ca0ff882 Mon Sep 17 00:00:00 2001
From: onur-ozkan <work@onurozkan.dev>
Date: Wed, 10 Jul 2024 08:35:31 +0300
Subject: [PATCH 08/14] remove unnecessary `git` usages

`Config::src` already contains the top-level path, so we don't need to
add git overhead just to reach this path.

Signed-off-by: onur-ozkan <work@onurozkan.dev>
---
 src/bootstrap/src/core/config/config.rs | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/bootstrap/src/core/config/config.rs b/src/bootstrap/src/core/config/config.rs
index 10ac6c93e9a43..84c621a06b542 100644
--- a/src/bootstrap/src/core/config/config.rs
+++ b/src/bootstrap/src/core/config/config.rs
@@ -2445,14 +2445,6 @@ impl Config {
             }
         };
 
-        // Handle running from a directory other than the top level
-        let top_level = output(
-            &mut helpers::git(Some(&self.src)).args(["rev-parse", "--show-toplevel"]).command,
-        );
-        let top_level = top_level.trim_end();
-        let compiler = format!("{top_level}/compiler/");
-        let library = format!("{top_level}/library/");
-
         // Look for a version to compare to based on the current commit.
         // Only commits merged by bors will have CI artifacts.
         let merge_base = output(
@@ -2473,7 +2465,9 @@ impl Config {
 
         // Warn if there were changes to the compiler or standard library since the ancestor commit.
         let has_changes = !t!(helpers::git(Some(&self.src))
-            .args(["diff-index", "--quiet", commit, "--", &compiler, &library])
+            .args(["diff-index", "--quiet", commit])
+            .arg("--")
+            .args([self.src.join("compiler"), self.src.join("library")])
             .command
             .status())
         .success();
@@ -2545,12 +2539,6 @@ impl Config {
         option_name: &str,
         if_unchanged: bool,
     ) -> Option<String> {
-        // Handle running from a directory other than the top level
-        let top_level = output(
-            &mut helpers::git(Some(&self.src)).args(["rev-parse", "--show-toplevel"]).command,
-        );
-        let top_level = top_level.trim_end();
-
         // Look for a version to compare to based on the current commit.
         // Only commits merged by bors will have CI artifacts.
         let merge_base = output(
@@ -2573,8 +2561,11 @@ impl Config {
         let mut git = helpers::git(Some(&self.src));
         git.args(["diff-index", "--quiet", commit, "--"]);
 
+        // Handle running from a directory other than the top level
+        let top_level = &self.src;
+
         for path in modified_paths {
-            git.arg(format!("{top_level}/{path}"));
+            git.arg(top_level.join(path));
         }
 
         let has_changes = !t!(git.command.status()).success();

From f56b2074c688f09be47e6ca82f604eabaa0b8f35 Mon Sep 17 00:00:00 2001
From: lcnr <rust@lcnr.de>
Date: Thu, 11 Jul 2024 15:39:17 +0200
Subject: [PATCH 09/14] solve -> solve/mod

---
 compiler/rustc_type_ir/src/{solve.rs => solve/mod.rs} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename compiler/rustc_type_ir/src/{solve.rs => solve/mod.rs} (100%)

diff --git a/compiler/rustc_type_ir/src/solve.rs b/compiler/rustc_type_ir/src/solve/mod.rs
similarity index 100%
rename from compiler/rustc_type_ir/src/solve.rs
rename to compiler/rustc_type_ir/src/solve/mod.rs

From 55256c5a183c175a38ee83def7314bf3ad440253 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 10 Jul 2024 15:44:09 +0000
Subject: [PATCH 10/14] Update dist-riscv64-linux to binutils 2.40

binutils 2.40 is required by LLVM 19, as older versions do not
know about the zmmull extension.

I've had to backport some patches to glibc and gcc as well,
as they don't build with binutils 2.40. Alternatively, we could
also switch to glibc 2.35 and gcc 12 (I think). I figured we'd
want to avoid the glibc version change, but if that's fine for
riscv I can go with that instead.
---
 .../host-x86_64/dist-riscv64-linux/Dockerfile |   1 +
 .../gcc/8.5.0/0001-divdi3-div-zero.patch      |  37 ++++++
 .../gcc/8.5.0/0002-hidden-jump-target.patch   | 117 ++++++++++++++++++
 .../glibc/2.29/0001-hidden-jump-target.patch  |  58 +++++++++
 .../riscv64-unknown-linux-gnu.defconfig       |   4 +-
 5 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0001-divdi3-div-zero.patch
 create mode 100644 src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0002-hidden-jump-target.patch
 create mode 100644 src/ci/docker/host-x86_64/dist-riscv64-linux/patches/glibc/2.29/0001-hidden-jump-target.patch

diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/Dockerfile b/src/ci/docker/host-x86_64/dist-riscv64-linux/Dockerfile
index 426e601f5d34d..4d9334dde8c55 100644
--- a/src/ci/docker/host-x86_64/dist-riscv64-linux/Dockerfile
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/Dockerfile
@@ -11,6 +11,7 @@ RUN sh /scripts/rustbuild-setup.sh
 WORKDIR /tmp
 
 COPY scripts/crosstool-ng-build.sh /scripts/
+COPY host-x86_64/dist-riscv64-linux/patches/ /tmp/patches/
 COPY host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig /tmp/crosstool.defconfig
 RUN /scripts/crosstool-ng-build.sh
 
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0001-divdi3-div-zero.patch b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0001-divdi3-div-zero.patch
new file mode 100644
index 0000000000000..f688eaf8029ec
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0001-divdi3-div-zero.patch
@@ -0,0 +1,37 @@
+From 4013baf99c38f7bca06a51f8301e8fb195ccfa33 Mon Sep 17 00:00:00 2001
+From: Jim Wilson <jimw@sifive.com>
+Date: Tue, 2 Jun 2020 11:19:39 -0700
+Subject: [PATCH] RISC-V: Make __divdi3 handle div by zero same as hardware.
+
+The ISA manual specifies that divide by zero always returns -1 as the result.
+We were failing to do that when the dividend was negative.
+
+Original patch from Virginie Moser.
+
+	libgcc/
+	* config/riscv/div.S (__divdi3): For negative arguments, change bgez
+	to bgtz.
+---
+ libgcc/config/riscv/div.S | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/libgcc/config/riscv/div.S b/libgcc/config/riscv/div.S
+index 151f8e273ac77..17234324c1e41 100644
+--- a/libgcc/config/riscv/div.S
++++ b/libgcc/config/riscv/div.S
+@@ -107,10 +107,12 @@ FUNC_END (__umoddi3)
+   /* Handle negative arguments to __divdi3.  */
+ .L10:
+   neg   a0, a0
+-  bgez  a1, .L12      /* Compute __udivdi3(-a0, a1), then negate the result.  */
++  /* Zero is handled as a negative so that the result will not be inverted.  */
++  bgtz  a1, .L12     /* Compute __udivdi3(-a0, a1), then negate the result.  */
++
+   neg   a1, a1
+-  j     __udivdi3     /* Compute __udivdi3(-a0, -a1).  */
+-.L11:                 /* Compute __udivdi3(a0, -a1), then negate the result.  */
++  j     __udivdi3    /* Compute __udivdi3(-a0, -a1).  */
++.L11:                /* Compute __udivdi3(a0, -a1), then negate the result.  */
+   neg   a1, a1
+ .L12:
+   move  t0, ra
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0002-hidden-jump-target.patch b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0002-hidden-jump-target.patch
new file mode 100644
index 0000000000000..7ae4469428b13
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0002-hidden-jump-target.patch
@@ -0,0 +1,117 @@
+From 45116f342057b7facecd3d05c2091ce3a77eda59 Mon Sep 17 00:00:00 2001
+From: Nelson Chu <nelson.chu@sifive.com>
+Date: Mon, 29 Nov 2021 04:48:20 -0800
+Subject: [PATCH] RISC-V: jal cannot refer to a default visibility symbol for
+ shared object.
+
+This is the original binutils bugzilla report,
+https://sourceware.org/bugzilla/show_bug.cgi?id=28509
+
+And this is the first version of the proposed binutils patch,
+https://sourceware.org/pipermail/binutils/2021-November/118398.html
+
+After applying the binutils patch, I get the the unexpected error when
+building libgcc,
+
+/scratch/nelsonc/riscv-gnu-toolchain/riscv-gcc/libgcc/config/riscv/div.S:42:
+/scratch/nelsonc/build-upstream/rv64gc-linux/build-install/riscv64-unknown-linux-gnu/bin/ld: relocation R_RISCV_JAL against `__udivdi3' which may bind externally can not be used when making a shared object; recompile with -fPIC
+
+Therefore, this patch add an extra hidden alias symbol for __udivdi3, and
+then use HIDDEN_JUMPTARGET to target a non-preemptible symbol instead.
+The solution is similar to glibc as follows,
+https://sourceware.org/git/?p=glibc.git;a=commit;h=68389203832ab39dd0dbaabbc4059e7fff51c29b
+
+libgcc/ChangeLog:
+
+	* config/riscv/div.S: Add the hidden alias symbol for __udivdi3, and
+	then use HIDDEN_JUMPTARGET to target it since it is non-preemptible.
+	* config/riscv/riscv-asm.h: Added new macros HIDDEN_JUMPTARGET and
+	HIDDEN_DEF.
+---
+ libgcc/config/riscv/div.S       | 15 ++++++++-------
+ libgcc/config/riscv/riscv-asm.h |  6 ++++++
+ 2 files changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/libgcc/config/riscv/div.S b/libgcc/config/riscv/div.S
+index c9bd7879c1e36..723c3b82e48c6 100644
+--- a/libgcc/config/riscv/div.S
++++ b/libgcc/config/riscv/div.S
+@@ -40,7 +40,7 @@ FUNC_BEGIN (__udivsi3)
+   sll    a0, a0, 32
+   sll    a1, a1, 32
+   move   t0, ra
+-  jal    __udivdi3
++  jal    HIDDEN_JUMPTARGET(__udivdi3)
+   sext.w a0, a0
+   jr     t0
+ FUNC_END (__udivsi3)
+@@ -52,7 +52,7 @@ FUNC_BEGIN (__umodsi3)
+   srl    a0, a0, 32
+   srl    a1, a1, 32
+   move   t0, ra
+-  jal    __udivdi3
++  jal    HIDDEN_JUMPTARGET(__udivdi3)
+   sext.w a0, a1
+   jr     t0
+ FUNC_END (__umodsi3)
+@@ -95,11 +95,12 @@ FUNC_BEGIN (__udivdi3)
+ .L5:
+   ret
+ FUNC_END (__udivdi3)
++HIDDEN_DEF (__udivdi3)
+ 
+ FUNC_BEGIN (__umoddi3)
+   /* Call __udivdi3(a0, a1), then return the remainder, which is in a1.  */
+   move  t0, ra
+-  jal   __udivdi3
++  jal   HIDDEN_JUMPTARGET(__udivdi3)
+   move  a0, a1
+   jr    t0
+ FUNC_END (__umoddi3)
+@@ -111,12 +112,12 @@ FUNC_END (__umoddi3)
+   bgtz  a1, .L12     /* Compute __udivdi3(-a0, a1), then negate the result.  */
+ 
+   neg   a1, a1
+-  j     __udivdi3    /* Compute __udivdi3(-a0, -a1).  */
++  j     HIDDEN_JUMPTARGET(__udivdi3)     /* Compute __udivdi3(-a0, -a1).  */
+ .L11:                /* Compute __udivdi3(a0, -a1), then negate the result.  */
+   neg   a1, a1
+ .L12:
+   move  t0, ra
+-  jal   __udivdi3
++  jal   HIDDEN_JUMPTARGET(__udivdi3)
+   neg   a0, a0
+   jr    t0
+ FUNC_END (__divdi3)
+@@ -126,7 +127,7 @@ FUNC_BEGIN (__moddi3)
+   bltz   a1, .L31
+   bltz   a0, .L32
+ .L30:
+-  jal    __udivdi3    /* The dividend is not negative.  */
++  jal    HIDDEN_JUMPTARGET(__udivdi3)    /* The dividend is not negative.  */
+   move   a0, a1
+   jr     t0
+ .L31:
+@@ -134,7 +135,7 @@ FUNC_BEGIN (__moddi3)
+   bgez   a0, .L30
+ .L32:
+   neg    a0, a0
+-  jal    __udivdi3    /* The dividend is hella negative.  */
++  jal    HIDDEN_JUMPTARGET(__udivdi3)    /* The dividend is hella negative.  */
+   neg    a0, a1
+   jr     t0
+ FUNC_END (__moddi3)
+diff --git a/libgcc/config/riscv/riscv-asm.h b/libgcc/config/riscv/riscv-asm.h
+index 8550707a4a26a..96dd85b0df2e5 100644
+--- a/libgcc/config/riscv/riscv-asm.h
++++ b/libgcc/config/riscv/riscv-asm.h
+@@ -33,3 +33,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ #define FUNC_ALIAS(X,Y)		\
+ 	.globl X;		\
+ 	X = Y
++
++#define CONCAT1(a, b)		CONCAT2(a, b)
++#define CONCAT2(a, b)		a ## b
++#define HIDDEN_JUMPTARGET(X)	CONCAT1(__hidden_, X)
++#define HIDDEN_DEF(X)		FUNC_ALIAS(HIDDEN_JUMPTARGET(X), X);     \
++				.hidden HIDDEN_JUMPTARGET(X)
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/glibc/2.29/0001-hidden-jump-target.patch b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/glibc/2.29/0001-hidden-jump-target.patch
new file mode 100644
index 0000000000000..d267b961d3472
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/glibc/2.29/0001-hidden-jump-target.patch
@@ -0,0 +1,58 @@
+From 68389203832ab39dd0dbaabbc4059e7fff51c29b Mon Sep 17 00:00:00 2001
+From: Fangrui Song <maskray@google.com>
+Date: Thu, 28 Oct 2021 11:39:49 -0700
+Subject: [PATCH] riscv: Fix incorrect jal with HIDDEN_JUMPTARGET
+
+A non-local STV_DEFAULT defined symbol is by default preemptible in a
+shared object. j/jal cannot target a preemptible symbol. On other
+architectures, such a jump instruction either causes PLT [BZ #18822], or
+if short-ranged, sometimes rejected by the linker (but not by GNU ld's
+riscv port [ld PR/28509]).
+
+Use HIDDEN_JUMPTARGET to target a non-preemptible symbol instead.
+
+With this patch, ld.so and libc.so can be linked with LLD if source
+files are compiled/assembled with -mno-relax/-Wa,-mno-relax.
+
+Acked-by: Palmer Dabbelt <palmer@dabbelt.com>
+Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+---
+ sysdeps/riscv/setjmp.S                     | 2 +-
+ sysdeps/unix/sysv/linux/riscv/setcontext.S | 5 +++--
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/sysdeps/riscv/setjmp.S b/sysdeps/riscv/setjmp.S
+index 0b92016b311..bec7ff80f49 100644
+--- a/sysdeps/riscv/setjmp.S
++++ b/sysdeps/riscv/setjmp.S
+@@ -21,7 +21,7 @@
+ 
+ ENTRY (_setjmp)
+   li	a1, 0
+-  j	__sigsetjmp
++  j	HIDDEN_JUMPTARGET (__sigsetjmp)
+ END (_setjmp)
+ ENTRY (setjmp)
+   li	a1, 1
+diff --git a/sysdeps/unix/sysv/linux/riscv/setcontext.S b/sysdeps/unix/sysv/linux/riscv/setcontext.S
+index 9510518750a..e44a68aad47 100644
+--- a/sysdeps/unix/sysv/linux/riscv/setcontext.S
++++ b/sysdeps/unix/sysv/linux/riscv/setcontext.S
+@@ -95,6 +95,7 @@ LEAF (__setcontext)
+ 99:	j	__syscall_error
+ 
+ END (__setcontext)
++libc_hidden_def (__setcontext)
+ weak_alias (__setcontext, setcontext)
+ 
+ LEAF (__start_context)
+@@ -108,7 +109,7 @@ LEAF (__start_context)
+ 	/* Invoke subsequent context if present, else exit(0).  */
+ 	mv	a0, s2
+ 	beqz	s2, 1f
+-	jal	__setcontext
+-1:	j	exit
++	jal	HIDDEN_JUMPTARGET (__setcontext)
++1:	j	HIDDEN_JUMPTARGET (exit)
+ 
+ END (__start_context)
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig b/src/ci/docker/host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig
index 470cef1a84e18..f7c93a9d5fc88 100644
--- a/src/ci/docker/host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig
@@ -3,6 +3,8 @@ CT_EXPERIMENTAL=y
 CT_PREFIX_DIR="/x-tools/${CT_TARGET}"
 CT_USE_MIRROR=y
 CT_MIRROR_BASE_URL="https://ci-mirrors.rust-lang.org/rustc"
+CT_PATCH_BUNDLED_LOCAL=y
+CT_LOCAL_PATCH_DIR="/tmp/patches"
 CT_ARCH_RISCV=y
 # CT_DEMULTILIB is not set
 CT_ARCH_USE_MMU=y
@@ -10,7 +12,7 @@ CT_ARCH_64=y
 CT_ARCH_ARCH="rv64gc"
 CT_KERNEL_LINUX=y
 CT_LINUX_V_4_20=y
-CT_BINUTILS_V_2_36=y
+CT_BINUTILS_V_2_40=y
 CT_GLIBC_V_2_29=y
 CT_GCC_V_8=y
 CT_CC_LANG_CXX=y

From ec05c4ea3fd9b6f7978091f3098ac52116e47a20 Mon Sep 17 00:00:00 2001
From: sayantn <sayantan.chakraborty@students.iiserpune.ac.in>
Date: Sun, 23 Jun 2024 12:12:51 +0530
Subject: [PATCH 11/14] Add the feature gate and target-features

---
 compiler/rustc_codegen_ssa/src/target_features.rs   |  1 +
 compiler/rustc_feature/src/unstable.rs              |  2 ++
 compiler/rustc_span/src/symbol.rs                   |  1 +
 compiler/rustc_target/src/target_features.rs        |  5 +++++
 tests/ui/check-cfg/mix.stderr                       |  2 +-
 tests/ui/check-cfg/well-known-values.stderr         |  2 +-
 .../feature-gate-x86_amx_intrinsics.rs              |  6 ++++++
 .../feature-gate-x86_amx_intrinsics.stderr          | 13 +++++++++++++
 8 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.rs
 create mode 100644 tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.stderr

diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs
index bcddfe9fb9cb0..22006c0b4712a 100644
--- a/compiler/rustc_codegen_ssa/src/target_features.rs
+++ b/compiler/rustc_codegen_ssa/src/target_features.rs
@@ -80,6 +80,7 @@ pub fn from_target_feature(
                 Some(sym::loongarch_target_feature) => rust_features.loongarch_target_feature,
                 Some(sym::lahfsahf_target_feature) => rust_features.lahfsahf_target_feature,
                 Some(sym::prfchw_target_feature) => rust_features.prfchw_target_feature,
+                Some(sym::x86_amx_intrinsics) => rust_features.x86_amx_intrinsics,
                 Some(name) => bug!("unknown target feature gate {}", name),
                 None => true,
             };
diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs
index d7d994d95c51e..3f550f658e837 100644
--- a/compiler/rustc_feature/src/unstable.rs
+++ b/compiler/rustc_feature/src/unstable.rs
@@ -640,6 +640,8 @@ declare_features! (
     (unstable, unsized_tuple_coercion, "1.20.0", Some(42877)),
     /// Allows using the `#[used(linker)]` (or `#[used(compiler)]`) attribute.
     (unstable, used_with_arg, "1.60.0", Some(93798)),
+    /// Allows use of x86 `AMX` target-feature attributes and intrinsics
+    (unstable, x86_amx_intrinsics, "CURRENT_RUSTC_VERSION", Some(126622)),
     /// Allows `do yeet` expressions
     (unstable, yeet_expr, "1.62.0", Some(96373)),
     // !!!!    !!!!    !!!!    !!!!   !!!!    !!!!    !!!!    !!!!    !!!!    !!!!    !!!!
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index af56f4e51413d..827b9062d83ab 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -2072,6 +2072,7 @@ symbols! {
         write_str,
         write_via_move,
         writeln_macro,
+        x86_amx_intrinsics,
         x87_reg,
         xer,
         xmm_reg,
diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs
index 017fd3072fdb7..aec2828181b91 100644
--- a/compiler/rustc_target/src/target_features.rs
+++ b/compiler/rustc_target/src/target_features.rs
@@ -192,6 +192,11 @@ const X86_ALLOWED_FEATURES: &[(&str, Stability)] = &[
     // tidy-alphabetical-start
     ("adx", Stable),
     ("aes", Stable),
+    ("amx-bf16", Unstable(sym::x86_amx_intrinsics)),
+    ("amx-complex", Unstable(sym::x86_amx_intrinsics)),
+    ("amx-fp16", Unstable(sym::x86_amx_intrinsics)),
+    ("amx-int8", Unstable(sym::x86_amx_intrinsics)),
+    ("amx-tile", Unstable(sym::x86_amx_intrinsics)),
     ("avx", Stable),
     ("avx2", Stable),
     ("avx512bf16", Unstable(sym::avx512_target_feature)),
diff --git a/tests/ui/check-cfg/mix.stderr b/tests/ui/check-cfg/mix.stderr
index cc63466585a6a..15b0100d7d23b 100644
--- a/tests/ui/check-cfg/mix.stderr
+++ b/tests/ui/check-cfg/mix.stderr
@@ -251,7 +251,7 @@ warning: unexpected `cfg` condition value: `zebra`
 LL |     cfg!(target_feature = "zebra");
    |          ^^^^^^^^^^^^^^^^^^^^^^^^
    |
-   = note: expected values for `target_feature` are: `10e60`, `2e3`, `3e3r1`, `3e3r2`, `3e3r3`, `3e7`, `7e10`, `a`, `aclass`, `adx`, `aes`, `altivec`, `alu32`, `atomics`, `avx`, `avx2`, `avx512bf16`, `avx512bitalg`, `avx512bw`, `avx512cd`, `avx512dq`, `avx512f`, `avx512fp16`, `avx512ifma`, `avx512vbmi`, `avx512vbmi2`, `avx512vl`, `avx512vnni`, `avx512vp2intersect`, `avx512vpopcntdq`, `avxifma`, `avxneconvert`, `avxvnni`, `avxvnniint16`, and `avxvnniint8` and 191 more
+   = note: expected values for `target_feature` are: `10e60`, `2e3`, `3e3r1`, `3e3r2`, `3e3r3`, `3e7`, `7e10`, `a`, `aclass`, `adx`, `aes`, `altivec`, `alu32`, `amx-bf16`, `amx-complex`, `amx-fp16`, `amx-int8`, `amx-tile`, `atomics`, `avx`, `avx2`, `avx512bf16`, `avx512bitalg`, `avx512bw`, `avx512cd`, `avx512dq`, `avx512f`, `avx512fp16`, `avx512ifma`, `avx512vbmi`, `avx512vbmi2`, `avx512vl`, `avx512vnni`, `avx512vp2intersect`, and `avx512vpopcntdq` and 196 more
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: 27 warnings emitted
diff --git a/tests/ui/check-cfg/well-known-values.stderr b/tests/ui/check-cfg/well-known-values.stderr
index 8a99ace75d852..c35fb68c839dd 100644
--- a/tests/ui/check-cfg/well-known-values.stderr
+++ b/tests/ui/check-cfg/well-known-values.stderr
@@ -165,7 +165,7 @@ warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
 LL |     target_feature = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
-   = note: expected values for `target_feature` are: `10e60`, `2e3`, `3e3r1`, `3e3r2`, `3e3r3`, `3e7`, `7e10`, `a`, `aclass`, `adx`, `aes`, `altivec`, `alu32`, `atomics`, `avx`, `avx2`, `avx512bf16`, `avx512bitalg`, `avx512bw`, `avx512cd`, `avx512dq`, `avx512f`, `avx512fp16`, `avx512ifma`, `avx512vbmi`, `avx512vbmi2`, `avx512vl`, `avx512vnni`, `avx512vp2intersect`, `avx512vpopcntdq`, `avxifma`, `avxneconvert`, `avxvnni`, `avxvnniint16`, `avxvnniint8`, `bf16`, `bmi1`, `bmi2`, `bti`, `bulk-memory`, `c`, `cache`, `cmpxchg16b`, `crc`, `crt-static`, `d`, `d32`, `dit`, `doloop`, `dotprod`, `dpb`, `dpb2`, `dsp`, `dsp1e2`, `dspe60`, `e`, `e1`, `e2`, `edsp`, `elrw`, `ermsb`, `exception-handling`, `extended-const`, `f`, `f16c`, `f32mm`, `f64mm`, `fcma`, `fdivdu`, `fhm`, `flagm`, `float1e2`, `float1e3`, `float3e4`, `float7e60`, `floate1`, `fma`, `fp-armv8`, `fp16`, `fp64`, `fpuv2_df`, `fpuv2_sf`, `fpuv3_df`, `fpuv3_hf`, `fpuv3_hi`, `fpuv3_sf`, `frecipe`, `frintts`, `fxsr`, `gfni`, `hard-float`, `hard-float-abi`, `hard-tp`, `high-registers`, `hvx`, `hvx-length128b`, `hwdiv`, `i8mm`, `jsconv`, `lahfsahf`, `lasx`, `lbt`, `lor`, `lse`, `lsx`, `lvz`, `lzcnt`, `m`, `mclass`, `movbe`, `mp`, `mp1e2`, `msa`, `mte`, `multivalue`, `mutable-globals`, `neon`, `nontrapping-fptoint`, `nvic`, `paca`, `pacg`, `pan`, `pclmulqdq`, `pmuv3`, `popcnt`, `power10-vector`, `power8-altivec`, `power8-vector`, `power9-altivec`, `power9-vector`, `prfchw`, `rand`, `ras`, `rclass`, `rcpc`, `rcpc2`, `rdm`, `rdrand`, `rdseed`, `reference-types`, `relax`, `relaxed-simd`, `rtm`, `sb`, `sha`, `sha2`, `sha3`, `sign-ext`, `simd128`, `sm4`, `spe`, `ssbs`, `sse`, `sse2`, `sse3`, `sse4.1`, `sse4.2`, `sse4a`, `ssse3`, `sve`, `sve2`, `sve2-aes`, `sve2-bitperm`, `sve2-sha3`, `sve2-sm4`, `tbm`, `thumb-mode`, `thumb2`, `tme`, `trust`, `trustzone`, `ual`, `unaligned-scalar-mem`, `v`, `v5te`, `v6`, `v6k`, `v6t2`, `v7`, `v8`, `v8.1a`, `v8.2a`, `v8.3a`, `v8.4a`, `v8.5a`, `v8.6a`, `v8.7a`, `vaes`, `vdsp2e60f`, `vdspv1`, `vdspv2`, `vfp2`, `vfp3`, `vfp4`, `vh`, `virt`, `virtualization`, `vpclmulqdq`, `vsx`, `xsave`, `xsavec`, `xsaveopt`, `xsaves`, `zba`, `zbb`, `zbc`, `zbkb`, `zbkc`, `zbkx`, `zbs`, `zdinx`, `zfh`, `zfhmin`, `zfinx`, `zhinx`, `zhinxmin`, `zk`, `zkn`, `zknd`, `zkne`, `zknh`, `zkr`, `zks`, `zksed`, `zksh`, and `zkt`
+   = note: expected values for `target_feature` are: `10e60`, `2e3`, `3e3r1`, `3e3r2`, `3e3r3`, `3e7`, `7e10`, `a`, `aclass`, `adx`, `aes`, `altivec`, `alu32`, `amx-bf16`, `amx-complex`, `amx-fp16`, `amx-int8`, `amx-tile`, `atomics`, `avx`, `avx2`, `avx512bf16`, `avx512bitalg`, `avx512bw`, `avx512cd`, `avx512dq`, `avx512f`, `avx512fp16`, `avx512ifma`, `avx512vbmi`, `avx512vbmi2`, `avx512vl`, `avx512vnni`, `avx512vp2intersect`, `avx512vpopcntdq`, `avxifma`, `avxneconvert`, `avxvnni`, `avxvnniint16`, `avxvnniint8`, `bf16`, `bmi1`, `bmi2`, `bti`, `bulk-memory`, `c`, `cache`, `cmpxchg16b`, `crc`, `crt-static`, `d`, `d32`, `dit`, `doloop`, `dotprod`, `dpb`, `dpb2`, `dsp`, `dsp1e2`, `dspe60`, `e`, `e1`, `e2`, `edsp`, `elrw`, `ermsb`, `exception-handling`, `extended-const`, `f`, `f16c`, `f32mm`, `f64mm`, `fcma`, `fdivdu`, `fhm`, `flagm`, `float1e2`, `float1e3`, `float3e4`, `float7e60`, `floate1`, `fma`, `fp-armv8`, `fp16`, `fp64`, `fpuv2_df`, `fpuv2_sf`, `fpuv3_df`, `fpuv3_hf`, `fpuv3_hi`, `fpuv3_sf`, `frecipe`, `frintts`, `fxsr`, `gfni`, `hard-float`, `hard-float-abi`, `hard-tp`, `high-registers`, `hvx`, `hvx-length128b`, `hwdiv`, `i8mm`, `jsconv`, `lahfsahf`, `lasx`, `lbt`, `lor`, `lse`, `lsx`, `lvz`, `lzcnt`, `m`, `mclass`, `movbe`, `mp`, `mp1e2`, `msa`, `mte`, `multivalue`, `mutable-globals`, `neon`, `nontrapping-fptoint`, `nvic`, `paca`, `pacg`, `pan`, `pclmulqdq`, `pmuv3`, `popcnt`, `power10-vector`, `power8-altivec`, `power8-vector`, `power9-altivec`, `power9-vector`, `prfchw`, `rand`, `ras`, `rclass`, `rcpc`, `rcpc2`, `rdm`, `rdrand`, `rdseed`, `reference-types`, `relax`, `relaxed-simd`, `rtm`, `sb`, `sha`, `sha2`, `sha3`, `sign-ext`, `simd128`, `sm4`, `spe`, `ssbs`, `sse`, `sse2`, `sse3`, `sse4.1`, `sse4.2`, `sse4a`, `ssse3`, `sve`, `sve2`, `sve2-aes`, `sve2-bitperm`, `sve2-sha3`, `sve2-sm4`, `tbm`, `thumb-mode`, `thumb2`, `tme`, `trust`, `trustzone`, `ual`, `unaligned-scalar-mem`, `v`, `v5te`, `v6`, `v6k`, `v6t2`, `v7`, `v8`, `v8.1a`, `v8.2a`, `v8.3a`, `v8.4a`, `v8.5a`, `v8.6a`, `v8.7a`, `vaes`, `vdsp2e60f`, `vdspv1`, `vdspv2`, `vfp2`, `vfp3`, `vfp4`, `vh`, `virt`, `virtualization`, `vpclmulqdq`, `vsx`, `xsave`, `xsavec`, `xsaveopt`, `xsaves`, `zba`, `zbb`, `zbc`, `zbkb`, `zbkc`, `zbkx`, `zbs`, `zdinx`, `zfh`, `zfhmin`, `zfinx`, `zhinx`, `zhinxmin`, `zk`, `zkn`, `zknd`, `zkne`, `zknh`, `zkr`, `zks`, `zksed`, `zksh`, and `zkt`
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
diff --git a/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.rs b/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.rs
new file mode 100644
index 0000000000000..ecbfc0bce5c56
--- /dev/null
+++ b/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.rs
@@ -0,0 +1,6 @@
+//@ only-x86_64
+#[target_feature(enable = "amx-tile")]
+//~^ ERROR: currently unstable
+unsafe fn foo() {}
+
+fn main() {}
diff --git a/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.stderr b/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.stderr
new file mode 100644
index 0000000000000..58d577a37902c
--- /dev/null
+++ b/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.stderr
@@ -0,0 +1,13 @@
+error[E0658]: the target feature `amx-tile` is currently unstable
+  --> $DIR/feature-gate-x86_amx_intrinsics.rs:2:18
+   |
+LL | #[target_feature(enable = "amx-tile")]
+   |                  ^^^^^^^^^^^^^^^^^^^
+   |
+   = note: see issue #126622 <https://github.com/rust-lang/rust/issues/126622> for more information
+   = help: add `#![feature(x86_amx_intrinsics)]` to the crate attributes to enable
+   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
+
+error: aborting due to 1 previous error
+
+For more information about this error, try `rustc --explain E0658`.

From 7f1518bddd76ad8e3a743278fd983c35d11b2411 Mon Sep 17 00:00:00 2001
From: David Tolnay <dtolnay@gmail.com>
Date: Thu, 11 Jul 2024 20:12:43 -0700
Subject: [PATCH 12/14] Add instability attribute on private const_strlen
 function

A `rustc_const_stable` attribute by itself has nonintuitive purpose when
placed in a public module.

Separately, it would probably be okay to rename `const_strlen` to just
`strlen` to make it more clear this is our general-purpose
implementation of strlen now, not something specifically for const
(avoiding confusion like in PR 127444).
---
 library/core/src/ffi/c_str.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs
index f845dfc1fc413..dc2a5803a1b20 100644
--- a/library/core/src/ffi/c_str.rs
+++ b/library/core/src/ffi/c_str.rs
@@ -740,6 +740,7 @@ impl AsRef<CStr> for CStr {
 /// The pointer must point to a valid buffer that contains a NUL terminator. The NUL must be
 /// located within `isize::MAX` from `ptr`.
 #[inline]
+#[unstable(feature = "cstr_internals", issue = "none")]
 #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
 #[rustc_allow_const_fn_unstable(const_eval_select)]
 const unsafe fn const_strlen(ptr: *const c_char) -> usize {

From 3f4b9dd463ca37c68dd6b27592e37a9287099406 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Fri, 12 Jul 2024 11:27:46 +0200
Subject: [PATCH 13/14] Lower timeout of CI jobs to 4 hours

The previous value, 10 hours, is unnecessarily long, since most of our jobs finish within 2.5 hours currently.
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4cf0e5fba5378..8032154a7365b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,7 +65,7 @@ jobs:
     defaults:
       run:
         shell: ${{ contains(matrix.os, 'windows') && 'msys2 {0}' || 'bash' }}
-    timeout-minutes: 600
+    timeout-minutes: 240
     env:
       CI_JOB_NAME: ${{ matrix.image }}
       CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse

From 15f770b1436c2be227c9338040e53d64f211fe63 Mon Sep 17 00:00:00 2001
From: lcnr <rust@lcnr.de>
Date: Thu, 11 Jul 2024 23:13:12 +0200
Subject: [PATCH 14/14] enable fuzzing of `SearchGraph`

fully move it into `rustc_type_ir` and make it
independent of `Interner`.
---
 compiler/rustc_middle/src/traits/solve.rs     |   4 -
 .../rustc_middle/src/traits/solve/cache.rs    | 121 ----
 compiler/rustc_middle/src/ty/context.rs       |  29 +-
 .../src/solve/eval_ctxt/mod.rs                |  14 +-
 .../src/solve/inspect/build.rs                |  69 +-
 .../src/solve/search_graph.rs                 | 623 ++----------------
 compiler/rustc_query_system/src/cache.rs      |   2 +-
 compiler/rustc_type_ir/src/inherent.rs        |  30 +-
 compiler/rustc_type_ir/src/interner.rs        |  48 +-
 compiler/rustc_type_ir/src/lib.rs             |   1 +
 .../src/search_graph/global_cache.rs          | 118 ++++
 .../rustc_type_ir/src/search_graph/mod.rs     | 605 +++++++++++++++++
 .../src/search_graph/validate.rs              |  75 +++
 13 files changed, 982 insertions(+), 757 deletions(-)
 delete mode 100644 compiler/rustc_middle/src/traits/solve/cache.rs
 create mode 100644 compiler/rustc_type_ir/src/search_graph/global_cache.rs
 create mode 100644 compiler/rustc_type_ir/src/search_graph/mod.rs
 create mode 100644 compiler/rustc_type_ir/src/search_graph/validate.rs

diff --git a/compiler/rustc_middle/src/traits/solve.rs b/compiler/rustc_middle/src/traits/solve.rs
index 7bc4c60f10272..f659bf8125a0e 100644
--- a/compiler/rustc_middle/src/traits/solve.rs
+++ b/compiler/rustc_middle/src/traits/solve.rs
@@ -8,10 +8,6 @@ use crate::ty::{
     self, FallibleTypeFolder, TyCtxt, TypeFoldable, TypeFolder, TypeVisitable, TypeVisitor,
 };
 
-mod cache;
-
-pub use cache::EvaluationCache;
-
 pub type Goal<'tcx, P> = ir::solve::Goal<TyCtxt<'tcx>, P>;
 pub type QueryInput<'tcx, P> = ir::solve::QueryInput<TyCtxt<'tcx>, P>;
 pub type QueryResult<'tcx> = ir::solve::QueryResult<TyCtxt<'tcx>>;
diff --git a/compiler/rustc_middle/src/traits/solve/cache.rs b/compiler/rustc_middle/src/traits/solve/cache.rs
deleted file mode 100644
index 72a8d4eb4050c..0000000000000
--- a/compiler/rustc_middle/src/traits/solve/cache.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-use super::{inspect, CanonicalInput, QueryResult};
-use crate::ty::TyCtxt;
-use rustc_data_structures::fx::{FxHashMap, FxHashSet};
-use rustc_data_structures::sync::Lock;
-use rustc_query_system::cache::WithDepNode;
-use rustc_query_system::dep_graph::DepNodeIndex;
-use rustc_session::Limit;
-use rustc_type_ir::solve::CacheData;
-
-/// The trait solver cache used by `-Znext-solver`.
-///
-/// FIXME(@lcnr): link to some official documentation of how
-/// this works.
-#[derive(Default)]
-pub struct EvaluationCache<'tcx> {
-    map: Lock<FxHashMap<CanonicalInput<'tcx>, CacheEntry<'tcx>>>,
-}
-
-impl<'tcx> rustc_type_ir::inherent::EvaluationCache<TyCtxt<'tcx>> for &'tcx EvaluationCache<'tcx> {
-    /// Insert a final result into the global cache.
-    fn insert(
-        &self,
-        tcx: TyCtxt<'tcx>,
-        key: CanonicalInput<'tcx>,
-        proof_tree: Option<&'tcx inspect::CanonicalGoalEvaluationStep<TyCtxt<'tcx>>>,
-        additional_depth: usize,
-        encountered_overflow: bool,
-        cycle_participants: FxHashSet<CanonicalInput<'tcx>>,
-        dep_node: DepNodeIndex,
-        result: QueryResult<'tcx>,
-    ) {
-        let mut map = self.map.borrow_mut();
-        let entry = map.entry(key).or_default();
-        let data = WithDepNode::new(dep_node, QueryData { result, proof_tree });
-        entry.cycle_participants.extend(cycle_participants);
-        if encountered_overflow {
-            entry.with_overflow.insert(additional_depth, data);
-        } else {
-            entry.success = Some(Success { data, additional_depth });
-        }
-
-        if cfg!(debug_assertions) {
-            drop(map);
-            let expected = CacheData { result, proof_tree, additional_depth, encountered_overflow };
-            let actual = self.get(tcx, key, [], additional_depth);
-            if !actual.as_ref().is_some_and(|actual| expected == *actual) {
-                bug!("failed to lookup inserted element for {key:?}: {expected:?} != {actual:?}");
-            }
-        }
-    }
-
-    /// Try to fetch a cached result, checking the recursion limit
-    /// and handling root goals of coinductive cycles.
-    ///
-    /// If this returns `Some` the cache result can be used.
-    fn get(
-        &self,
-        tcx: TyCtxt<'tcx>,
-        key: CanonicalInput<'tcx>,
-        stack_entries: impl IntoIterator<Item = CanonicalInput<'tcx>>,
-        available_depth: usize,
-    ) -> Option<CacheData<TyCtxt<'tcx>>> {
-        let map = self.map.borrow();
-        let entry = map.get(&key)?;
-
-        for stack_entry in stack_entries {
-            if entry.cycle_participants.contains(&stack_entry) {
-                return None;
-            }
-        }
-
-        if let Some(ref success) = entry.success {
-            if Limit(available_depth).value_within_limit(success.additional_depth) {
-                let QueryData { result, proof_tree } = success.data.get(tcx);
-                return Some(CacheData {
-                    result,
-                    proof_tree,
-                    additional_depth: success.additional_depth,
-                    encountered_overflow: false,
-                });
-            }
-        }
-
-        entry.with_overflow.get(&available_depth).map(|e| {
-            let QueryData { result, proof_tree } = e.get(tcx);
-            CacheData {
-                result,
-                proof_tree,
-                additional_depth: available_depth,
-                encountered_overflow: true,
-            }
-        })
-    }
-}
-
-struct Success<'tcx> {
-    data: WithDepNode<QueryData<'tcx>>,
-    additional_depth: usize,
-}
-
-#[derive(Clone, Copy)]
-pub struct QueryData<'tcx> {
-    pub result: QueryResult<'tcx>,
-    pub proof_tree: Option<&'tcx inspect::CanonicalGoalEvaluationStep<TyCtxt<'tcx>>>,
-}
-
-/// The cache entry for a goal `CanonicalInput`.
-///
-/// This contains results whose computation never hit the
-/// recursion limit in `success`, and all results which hit
-/// the recursion limit in `with_overflow`.
-#[derive(Default)]
-struct CacheEntry<'tcx> {
-    success: Option<Success<'tcx>>,
-    /// We have to be careful when caching roots of cycles.
-    ///
-    /// See the doc comment of `StackEntry::cycle_participants` for more
-    /// details.
-    cycle_participants: FxHashSet<CanonicalInput<'tcx>>,
-    with_overflow: FxHashMap<usize, WithDepNode<QueryData<'tcx>>>,
-}
diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs
index aee42bfe3aaca..9e24ea485b26e 100644
--- a/compiler/rustc_middle/src/ty/context.rs
+++ b/compiler/rustc_middle/src/ty/context.rs
@@ -59,6 +59,7 @@ use rustc_hir::lang_items::LangItem;
 use rustc_hir::{HirId, Node, TraitCandidate};
 use rustc_index::IndexVec;
 use rustc_macros::{HashStable, TyDecodable, TyEncodable};
+use rustc_query_system::cache::WithDepNode;
 use rustc_query_system::dep_graph::DepNodeIndex;
 use rustc_query_system::ich::StableHashingContext;
 use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
@@ -75,7 +76,7 @@ use rustc_type_ir::fold::TypeFoldable;
 use rustc_type_ir::lang_items::TraitSolverLangItem;
 use rustc_type_ir::solve::SolverMode;
 use rustc_type_ir::TyKind::*;
-use rustc_type_ir::{CollectAndApply, Interner, TypeFlags, WithCachedTypeInfo};
+use rustc_type_ir::{search_graph, CollectAndApply, Interner, TypeFlags, WithCachedTypeInfo};
 use tracing::{debug, instrument};
 
 use std::assert_matches::assert_matches;
@@ -164,12 +165,26 @@ impl<'tcx> Interner for TyCtxt<'tcx> {
     type Clause = Clause<'tcx>;
     type Clauses = ty::Clauses<'tcx>;
 
-    type EvaluationCache = &'tcx solve::EvaluationCache<'tcx>;
+    type Tracked<T: fmt::Debug + Clone> = WithDepNode<T>;
+    fn mk_tracked<T: fmt::Debug + Clone>(
+        self,
+        data: T,
+        dep_node: DepNodeIndex,
+    ) -> Self::Tracked<T> {
+        WithDepNode::new(dep_node, data)
+    }
+    fn get_tracked<T: fmt::Debug + Clone>(self, tracked: &Self::Tracked<T>) -> T {
+        tracked.get(self)
+    }
 
-    fn evaluation_cache(self, mode: SolverMode) -> &'tcx solve::EvaluationCache<'tcx> {
+    fn with_global_cache<R>(
+        self,
+        mode: SolverMode,
+        f: impl FnOnce(&mut search_graph::GlobalCache<Self>) -> R,
+    ) -> R {
         match mode {
-            SolverMode::Normal => &self.new_solver_evaluation_cache,
-            SolverMode::Coherence => &self.new_solver_coherence_evaluation_cache,
+            SolverMode::Normal => f(&mut *self.new_solver_evaluation_cache.lock()),
+            SolverMode::Coherence => f(&mut *self.new_solver_coherence_evaluation_cache.lock()),
         }
     }
 
@@ -1283,8 +1298,8 @@ pub struct GlobalCtxt<'tcx> {
     pub evaluation_cache: traits::EvaluationCache<'tcx>,
 
     /// Caches the results of goal evaluation in the new solver.
-    pub new_solver_evaluation_cache: solve::EvaluationCache<'tcx>,
-    pub new_solver_coherence_evaluation_cache: solve::EvaluationCache<'tcx>,
+    pub new_solver_evaluation_cache: Lock<search_graph::GlobalCache<TyCtxt<'tcx>>>,
+    pub new_solver_coherence_evaluation_cache: Lock<search_graph::GlobalCache<TyCtxt<'tcx>>>,
 
     pub canonical_param_env_cache: CanonicalParamEnvCache<'tcx>,
 
diff --git a/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs b/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
index c90f8e761633b..c23bc8f09ad16 100644
--- a/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
@@ -16,9 +16,9 @@ use crate::delegate::SolverDelegate;
 use crate::solve::inspect::{self, ProofTreeBuilder};
 use crate::solve::search_graph::SearchGraph;
 use crate::solve::{
-    search_graph, CanonicalInput, CanonicalResponse, Certainty, Goal, GoalEvaluationKind,
-    GoalSource, MaybeCause, NestedNormalizationGoals, NoSolution, PredefinedOpaquesData,
-    QueryResult, SolverMode, FIXPOINT_STEP_LIMIT,
+    CanonicalInput, CanonicalResponse, Certainty, Goal, GoalEvaluationKind, GoalSource, MaybeCause,
+    NestedNormalizationGoals, NoSolution, PredefinedOpaquesData, QueryResult, SolverMode,
+    FIXPOINT_STEP_LIMIT,
 };
 
 pub(super) mod canonical;
@@ -72,7 +72,7 @@ where
     /// new placeholders to the caller.
     pub(super) max_input_universe: ty::UniverseIndex,
 
-    pub(super) search_graph: &'a mut SearchGraph<I>,
+    pub(super) search_graph: &'a mut SearchGraph<D>,
 
     nested_goals: NestedGoals<I>,
 
@@ -200,7 +200,7 @@ where
         generate_proof_tree: GenerateProofTree,
         f: impl FnOnce(&mut EvalCtxt<'_, D>) -> R,
     ) -> (R, Option<inspect::GoalEvaluation<I>>) {
-        let mut search_graph = search_graph::SearchGraph::new(delegate.solver_mode());
+        let mut search_graph = SearchGraph::new(delegate.solver_mode());
 
         let mut ecx = EvalCtxt {
             delegate,
@@ -241,7 +241,7 @@ where
     /// and registering opaques from the canonicalized input.
     fn enter_canonical<R>(
         cx: I,
-        search_graph: &'a mut search_graph::SearchGraph<I>,
+        search_graph: &'a mut SearchGraph<D>,
         canonical_input: CanonicalInput<I>,
         canonical_goal_evaluation: &mut ProofTreeBuilder<D>,
         f: impl FnOnce(&mut EvalCtxt<'_, D>, Goal<I, I::Predicate>) -> R,
@@ -296,7 +296,7 @@ where
     #[instrument(level = "debug", skip(cx, search_graph, goal_evaluation), ret)]
     fn evaluate_canonical_goal(
         cx: I,
-        search_graph: &'a mut search_graph::SearchGraph<I>,
+        search_graph: &'a mut SearchGraph<D>,
         canonical_input: CanonicalInput<I>,
         goal_evaluation: &mut ProofTreeBuilder<D>,
     ) -> QueryResult<I> {
diff --git a/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs b/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
index b50676e8d5327..3e266ddac71fd 100644
--- a/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
@@ -8,7 +8,7 @@ use std::marker::PhantomData;
 use std::mem;
 
 use rustc_type_ir::inherent::*;
-use rustc_type_ir::{self as ty, Interner};
+use rustc_type_ir::{self as ty, search_graph, Interner};
 
 use crate::delegate::SolverDelegate;
 use crate::solve::eval_ctxt::canonical;
@@ -38,7 +38,7 @@ use crate::solve::{
 /// trees. At the end of trait solving `ProofTreeBuilder::finalize`
 /// is called to recursively convert the whole structure to a
 /// finished proof tree.
-pub(in crate::solve) struct ProofTreeBuilder<D, I = <D as SolverDelegate>::Interner>
+pub(crate) struct ProofTreeBuilder<D, I = <D as SolverDelegate>::Interner>
 where
     D: SolverDelegate<Interner = I>,
     I: Interner,
@@ -321,23 +321,6 @@ impl<D: SolverDelegate<Interner = I>, I: Interner> ProofTreeBuilder<D> {
         })
     }
 
-    pub fn finalize_canonical_goal_evaluation(
-        &mut self,
-        cx: I,
-    ) -> Option<I::CanonicalGoalEvaluationStepRef> {
-        self.as_mut().map(|this| match this {
-            DebugSolver::CanonicalGoalEvaluation(evaluation) => {
-                let final_revision = mem::take(&mut evaluation.final_revision).unwrap();
-                let final_revision =
-                    cx.intern_canonical_goal_evaluation_step(final_revision.finalize());
-                let kind = WipCanonicalGoalEvaluationKind::Interned { final_revision };
-                assert_eq!(evaluation.kind.replace(kind), None);
-                final_revision
-            }
-            _ => unreachable!(),
-        })
-    }
-
     pub fn canonical_goal_evaluation(&mut self, canonical_goal_evaluation: ProofTreeBuilder<D>) {
         if let Some(this) = self.as_mut() {
             match (this, *canonical_goal_evaluation.state.unwrap()) {
@@ -571,3 +554,51 @@ impl<D: SolverDelegate<Interner = I>, I: Interner> ProofTreeBuilder<D> {
         }
     }
 }
+
+impl<D, I> search_graph::ProofTreeBuilder<I> for ProofTreeBuilder<D>
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    fn try_apply_proof_tree(
+        &mut self,
+        proof_tree: Option<I::CanonicalGoalEvaluationStepRef>,
+    ) -> bool {
+        if !self.is_noop() {
+            if let Some(final_revision) = proof_tree {
+                let kind = WipCanonicalGoalEvaluationKind::Interned { final_revision };
+                self.canonical_goal_evaluation_kind(kind);
+                true
+            } else {
+                false
+            }
+        } else {
+            true
+        }
+    }
+
+    fn on_provisional_cache_hit(&mut self) {
+        self.canonical_goal_evaluation_kind(WipCanonicalGoalEvaluationKind::ProvisionalCacheHit);
+    }
+
+    fn on_cycle_in_stack(&mut self) {
+        self.canonical_goal_evaluation_kind(WipCanonicalGoalEvaluationKind::CycleInStack);
+    }
+
+    fn finalize_canonical_goal_evaluation(
+        &mut self,
+        tcx: I,
+    ) -> Option<I::CanonicalGoalEvaluationStepRef> {
+        self.as_mut().map(|this| match this {
+            DebugSolver::CanonicalGoalEvaluation(evaluation) => {
+                let final_revision = mem::take(&mut evaluation.final_revision).unwrap();
+                let final_revision =
+                    tcx.intern_canonical_goal_evaluation_step(final_revision.finalize());
+                let kind = WipCanonicalGoalEvaluationKind::Interned { final_revision };
+                assert_eq!(evaluation.kind.replace(kind), None);
+                final_revision
+            }
+            _ => unreachable!(),
+        })
+    }
+}
diff --git a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
index 69d52dcad7a59..fe053a506e712 100644
--- a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
@@ -1,599 +1,90 @@
-use std::mem;
+use std::marker::PhantomData;
 
-use rustc_index::{Idx, IndexVec};
-use rustc_type_ir::data_structures::{HashMap, HashSet};
 use rustc_type_ir::inherent::*;
+use rustc_type_ir::search_graph::{self, CycleKind, UsageKind};
+use rustc_type_ir::solve::{CanonicalInput, Certainty, QueryResult};
 use rustc_type_ir::Interner;
-use tracing::debug;
 
+use super::inspect::{self, ProofTreeBuilder};
+use super::FIXPOINT_STEP_LIMIT;
 use crate::delegate::SolverDelegate;
-use crate::solve::inspect::{self, ProofTreeBuilder};
-use crate::solve::{
-    CacheData, CanonicalInput, Certainty, QueryResult, SolverMode, FIXPOINT_STEP_LIMIT,
-};
 
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
-pub struct SolverLimit(usize);
-
-rustc_index::newtype_index! {
-    #[orderable]
-    #[gate_rustc_only]
-    pub struct StackDepth {}
-}
-
-bitflags::bitflags! {
-    /// Whether and how this goal has been used as the root of a
-    /// cycle. We track the kind of cycle as we're otherwise forced
-    /// to always rerun at least once.
-    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
-    struct HasBeenUsed: u8 {
-        const INDUCTIVE_CYCLE = 1 << 0;
-        const COINDUCTIVE_CYCLE = 1 << 1;
-    }
-}
-
-#[derive(derivative::Derivative)]
-#[derivative(Debug(bound = ""))]
-struct StackEntry<I: Interner> {
-    input: CanonicalInput<I>,
-
-    available_depth: SolverLimit,
-
-    /// The maximum depth reached by this stack entry, only up-to date
-    /// for the top of the stack and lazily updated for the rest.
-    reached_depth: StackDepth,
-
-    /// Whether this entry is a non-root cycle participant.
-    ///
-    /// We must not move the result of non-root cycle participants to the
-    /// global cache. We store the highest stack depth of a head of a cycle
-    /// this goal is involved in. This necessary to soundly cache its
-    /// provisional result.
-    non_root_cycle_participant: Option<StackDepth>,
-
-    encountered_overflow: bool,
-
-    has_been_used: HasBeenUsed,
-
-    /// We put only the root goal of a coinductive cycle into the global cache.
-    ///
-    /// If we were to use that result when later trying to prove another cycle
-    /// participant, we can end up with unstable query results.
-    ///
-    /// See tests/ui/next-solver/coinduction/incompleteness-unstable-result.rs for
-    /// an example of where this is needed.
-    ///
-    /// There can  be multiple roots on the same stack, so we need to track
-    /// cycle participants per root:
-    /// ```plain
-    /// A :- B
-    /// B :- A, C
-    /// C :- D
-    /// D :- C
-    /// ```
-    nested_goals: HashSet<CanonicalInput<I>>,
-    /// Starts out as `None` and gets set when rerunning this
-    /// goal in case we encounter a cycle.
-    provisional_result: Option<QueryResult<I>>,
-}
-
-/// The provisional result for a goal which is not on the stack.
-#[derive(Debug)]
-struct DetachedEntry<I: Interner> {
-    /// The head of the smallest non-trivial cycle involving this entry.
-    ///
-    /// Given the following rules, when proving `A` the head for
-    /// the provisional entry of `C` would be `B`.
-    /// ```plain
-    /// A :- B
-    /// B :- C
-    /// C :- A + B + C
-    /// ```
-    head: StackDepth,
-    result: QueryResult<I>,
-}
-
-/// Stores the stack depth of a currently evaluated goal *and* already
-/// computed results for goals which depend on other goals still on the stack.
-///
-/// The provisional result may depend on whether the stack above it is inductive
-/// or coinductive. Because of this, we store separate provisional results for
-/// each case. If an provisional entry is not applicable, it may be the case
-/// that we already have provisional result while computing a goal. In this case
-/// we prefer the provisional result to potentially avoid fixpoint iterations.
-/// See tests/ui/traits/next-solver/cycles/mixed-cycles-2.rs for an example.
-///
-/// The provisional cache can theoretically result in changes to the observable behavior,
-/// see tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs.
-#[derive(derivative::Derivative)]
-#[derivative(Default(bound = ""))]
-struct ProvisionalCacheEntry<I: Interner> {
-    stack_depth: Option<StackDepth>,
-    with_inductive_stack: Option<DetachedEntry<I>>,
-    with_coinductive_stack: Option<DetachedEntry<I>>,
-}
-
-impl<I: Interner> ProvisionalCacheEntry<I> {
-    fn is_empty(&self) -> bool {
-        self.stack_depth.is_none()
-            && self.with_inductive_stack.is_none()
-            && self.with_coinductive_stack.is_none()
-    }
+/// This type is never constructed. We only use it to implement `search_graph::Delegate`
+/// for all types which impl `SolverDelegate` and doing it directly fails in coherence.
+pub(super) struct SearchGraphDelegate<D: SolverDelegate> {
+    _marker: PhantomData<D>,
 }
+pub(super) type SearchGraph<D> = search_graph::SearchGraph<SearchGraphDelegate<D>>;
+impl<D, I> search_graph::Delegate for SearchGraphDelegate<D>
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    type Cx = D::Interner;
 
-pub(super) struct SearchGraph<I: Interner> {
-    mode: SolverMode,
-    /// The stack of goals currently being computed.
-    ///
-    /// An element is *deeper* in the stack if its index is *lower*.
-    stack: IndexVec<StackDepth, StackEntry<I>>,
-    provisional_cache: HashMap<CanonicalInput<I>, ProvisionalCacheEntry<I>>,
-}
+    const FIXPOINT_STEP_LIMIT: usize = FIXPOINT_STEP_LIMIT;
 
-impl<I: Interner> SearchGraph<I> {
-    pub(super) fn new(mode: SolverMode) -> SearchGraph<I> {
-        Self { mode, stack: Default::default(), provisional_cache: Default::default() }
-    }
+    type ProofTreeBuilder = ProofTreeBuilder<D>;
 
-    pub(super) fn solver_mode(&self) -> SolverMode {
-        self.mode
+    fn recursion_limit(cx: I) -> usize {
+        cx.recursion_limit()
     }
 
-    fn update_parent_goal(&mut self, reached_depth: StackDepth, encountered_overflow: bool) {
-        if let Some(parent) = self.stack.raw.last_mut() {
-            parent.reached_depth = parent.reached_depth.max(reached_depth);
-            parent.encountered_overflow |= encountered_overflow;
-        }
-    }
-
-    pub(super) fn is_empty(&self) -> bool {
-        self.stack.is_empty()
-    }
-
-    /// Returns the remaining depth allowed for nested goals.
-    ///
-    /// This is generally simply one less than the current depth.
-    /// However, if we encountered overflow, we significantly reduce
-    /// the remaining depth of all nested goals to prevent hangs
-    /// in case there is exponential blowup.
-    fn allowed_depth_for_nested(
+    fn initial_provisional_result(
         cx: I,
-        stack: &IndexVec<StackDepth, StackEntry<I>>,
-    ) -> Option<SolverLimit> {
-        if let Some(last) = stack.raw.last() {
-            if last.available_depth.0 == 0 {
-                return None;
-            }
-
-            Some(if last.encountered_overflow {
-                SolverLimit(last.available_depth.0 / 4)
-            } else {
-                SolverLimit(last.available_depth.0 - 1)
-            })
-        } else {
-            Some(SolverLimit(cx.recursion_limit()))
-        }
-    }
-
-    fn stack_coinductive_from(
-        cx: I,
-        stack: &IndexVec<StackDepth, StackEntry<I>>,
-        head: StackDepth,
-    ) -> bool {
-        stack.raw[head.index()..]
-            .iter()
-            .all(|entry| entry.input.value.goal.predicate.is_coinductive(cx))
-    }
-
-    // When encountering a solver cycle, the result of the current goal
-    // depends on goals lower on the stack.
-    //
-    // We have to therefore be careful when caching goals. Only the final result
-    // of the cycle root, i.e. the lowest goal on the stack involved in this cycle,
-    // is moved to the global cache while all others are stored in a provisional cache.
-    //
-    // We update both the head of this cycle to rerun its evaluation until
-    // we reach a fixpoint and all other cycle participants to make sure that
-    // their result does not get moved to the global cache.
-    fn tag_cycle_participants(
-        stack: &mut IndexVec<StackDepth, StackEntry<I>>,
-        usage_kind: HasBeenUsed,
-        head: StackDepth,
-    ) {
-        stack[head].has_been_used |= usage_kind;
-        debug_assert!(!stack[head].has_been_used.is_empty());
-
-        // The current root of these cycles. Note that this may not be the final
-        // root in case a later goal depends on a goal higher up the stack.
-        let mut current_root = head;
-        while let Some(parent) = stack[current_root].non_root_cycle_participant {
-            current_root = parent;
-            debug_assert!(!stack[current_root].has_been_used.is_empty());
-        }
-
-        let (stack, cycle_participants) = stack.raw.split_at_mut(head.index() + 1);
-        let current_cycle_root = &mut stack[current_root.as_usize()];
-        for entry in cycle_participants {
-            entry.non_root_cycle_participant = entry.non_root_cycle_participant.max(Some(head));
-            current_cycle_root.nested_goals.insert(entry.input);
-            current_cycle_root.nested_goals.extend(mem::take(&mut entry.nested_goals));
+        kind: CycleKind,
+        input: CanonicalInput<I>,
+    ) -> QueryResult<I> {
+        match kind {
+            CycleKind::Coinductive => response_no_constraints(cx, input, Certainty::Yes),
+            CycleKind::Inductive => response_no_constraints(cx, input, Certainty::overflow(false)),
         }
     }
 
-    fn clear_dependent_provisional_results(
-        provisional_cache: &mut HashMap<CanonicalInput<I>, ProvisionalCacheEntry<I>>,
-        head: StackDepth,
-    ) {
-        #[allow(rustc::potential_query_instability)]
-        provisional_cache.retain(|_, entry| {
-            if entry.with_coinductive_stack.as_ref().is_some_and(|p| p.head == head) {
-                entry.with_coinductive_stack.take();
-            }
-            if entry.with_inductive_stack.as_ref().is_some_and(|p| p.head == head) {
-                entry.with_inductive_stack.take();
-            }
-            !entry.is_empty()
-        });
-    }
-
-    /// The trait solver behavior is different for coherence
-    /// so we use a separate cache. Alternatively we could use
-    /// a single cache and share it between coherence and ordinary
-    /// trait solving.
-    pub(super) fn global_cache(&self, cx: I) -> I::EvaluationCache {
-        cx.evaluation_cache(self.mode)
-    }
-
-    /// Probably the most involved method of the whole solver.
-    ///
-    /// Given some goal which is proven via the `prove_goal` closure, this
-    /// handles caching, overflow, and coinductive cycles.
-    pub(super) fn with_new_goal<D: SolverDelegate<Interner = I>>(
-        &mut self,
+    fn reached_fixpoint(
         cx: I,
+        kind: UsageKind,
         input: CanonicalInput<I>,
-        inspect: &mut ProofTreeBuilder<D>,
-        mut prove_goal: impl FnMut(&mut Self, &mut ProofTreeBuilder<D>) -> QueryResult<I>,
-    ) -> QueryResult<I> {
-        self.check_invariants();
-        // Check for overflow.
-        let Some(available_depth) = Self::allowed_depth_for_nested(cx, &self.stack) else {
-            if let Some(last) = self.stack.raw.last_mut() {
-                last.encountered_overflow = true;
-            }
-
-            inspect
-                .canonical_goal_evaluation_kind(inspect::WipCanonicalGoalEvaluationKind::Overflow);
-            return Self::response_no_constraints(cx, input, Certainty::overflow(true));
-        };
-
-        if let Some(result) = self.lookup_global_cache(cx, input, available_depth, inspect) {
-            debug!("global cache hit");
-            return result;
-        }
-
-        // Check whether the goal is in the provisional cache.
-        // The provisional result may rely on the path to its cycle roots,
-        // so we have to check the path of the current goal matches that of
-        // the cache entry.
-        let cache_entry = self.provisional_cache.entry(input).or_default();
-        if let Some(entry) = cache_entry
-            .with_coinductive_stack
-            .as_ref()
-            .filter(|p| Self::stack_coinductive_from(cx, &self.stack, p.head))
-            .or_else(|| {
-                cache_entry
-                    .with_inductive_stack
-                    .as_ref()
-                    .filter(|p| !Self::stack_coinductive_from(cx, &self.stack, p.head))
-            })
-        {
-            debug!("provisional cache hit");
-            // We have a nested goal which is already in the provisional cache, use
-            // its result. We do not provide any usage kind as that should have been
-            // already set correctly while computing the cache entry.
-            inspect.canonical_goal_evaluation_kind(
-                inspect::WipCanonicalGoalEvaluationKind::ProvisionalCacheHit,
-            );
-            Self::tag_cycle_participants(&mut self.stack, HasBeenUsed::empty(), entry.head);
-            return entry.result;
-        } else if let Some(stack_depth) = cache_entry.stack_depth {
-            debug!("encountered cycle with depth {stack_depth:?}");
-            // We have a nested goal which directly relies on a goal deeper in the stack.
-            //
-            // We start by tagging all cycle participants, as that's necessary for caching.
-            //
-            // Finally we can return either the provisional response or the initial response
-            // in case we're in the first fixpoint iteration for this goal.
-            inspect.canonical_goal_evaluation_kind(
-                inspect::WipCanonicalGoalEvaluationKind::CycleInStack,
-            );
-            let is_coinductive_cycle = Self::stack_coinductive_from(cx, &self.stack, stack_depth);
-            let usage_kind = if is_coinductive_cycle {
-                HasBeenUsed::COINDUCTIVE_CYCLE
-            } else {
-                HasBeenUsed::INDUCTIVE_CYCLE
-            };
-            Self::tag_cycle_participants(&mut self.stack, usage_kind, stack_depth);
-
-            // Return the provisional result or, if we're in the first iteration,
-            // start with no constraints.
-            return if let Some(result) = self.stack[stack_depth].provisional_result {
-                result
-            } else if is_coinductive_cycle {
-                Self::response_no_constraints(cx, input, Certainty::Yes)
-            } else {
-                Self::response_no_constraints(cx, input, Certainty::overflow(false))
-            };
+        provisional_result: Option<QueryResult<I>>,
+        result: QueryResult<I>,
+    ) -> bool {
+        if let Some(r) = provisional_result {
+            r == result
         } else {
-            // No entry, we push this goal on the stack and try to prove it.
-            let depth = self.stack.next_index();
-            let entry = StackEntry {
-                input,
-                available_depth,
-                reached_depth: depth,
-                non_root_cycle_participant: None,
-                encountered_overflow: false,
-                has_been_used: HasBeenUsed::empty(),
-                nested_goals: Default::default(),
-                provisional_result: None,
-            };
-            assert_eq!(self.stack.push(entry), depth);
-            cache_entry.stack_depth = Some(depth);
-        }
-
-        // This is for global caching, so we properly track query dependencies.
-        // Everything that affects the `result` should be performed within this
-        // `with_anon_task` closure. If computing this goal depends on something
-        // not tracked by the cache key and from outside of this anon task, it
-        // must not be added to the global cache. Notably, this is the case for
-        // trait solver cycles participants.
-        let ((final_entry, result), dep_node) = cx.with_cached_task(|| {
-            for _ in 0..FIXPOINT_STEP_LIMIT {
-                match self.fixpoint_step_in_task(cx, input, inspect, &mut prove_goal) {
-                    StepResult::Done(final_entry, result) => return (final_entry, result),
-                    StepResult::HasChanged => debug!("fixpoint changed provisional results"),
+            match kind {
+                UsageKind::Single(CycleKind::Coinductive) => {
+                    response_no_constraints(cx, input, Certainty::Yes) == result
                 }
+                UsageKind::Single(CycleKind::Inductive) => {
+                    response_no_constraints(cx, input, Certainty::overflow(false)) == result
+                }
+                UsageKind::Mixed => false,
             }
-
-            debug!("canonical cycle overflow");
-            let current_entry = self.stack.pop().unwrap();
-            debug_assert!(current_entry.has_been_used.is_empty());
-            let result = Self::response_no_constraints(cx, input, Certainty::overflow(false));
-            (current_entry, result)
-        });
-
-        let proof_tree = inspect.finalize_canonical_goal_evaluation(cx);
-
-        self.update_parent_goal(final_entry.reached_depth, final_entry.encountered_overflow);
-
-        // We're now done with this goal. In case this goal is involved in a larger cycle
-        // do not remove it from the provisional cache and update its provisional result.
-        // We only add the root of cycles to the global cache.
-        if let Some(head) = final_entry.non_root_cycle_participant {
-            let coinductive_stack = Self::stack_coinductive_from(cx, &self.stack, head);
-
-            let entry = self.provisional_cache.get_mut(&input).unwrap();
-            entry.stack_depth = None;
-            if coinductive_stack {
-                entry.with_coinductive_stack = Some(DetachedEntry { head, result });
-            } else {
-                entry.with_inductive_stack = Some(DetachedEntry { head, result });
-            }
-        } else {
-            self.provisional_cache.remove(&input);
-            let reached_depth = final_entry.reached_depth.as_usize() - self.stack.len();
-            // When encountering a cycle, both inductive and coinductive, we only
-            // move the root into the global cache. We also store all other cycle
-            // participants involved.
-            //
-            // We must not use the global cache entry of a root goal if a cycle
-            // participant is on the stack. This is necessary to prevent unstable
-            // results. See the comment of `StackEntry::nested_goals` for
-            // more details.
-            self.global_cache(cx).insert(
-                cx,
-                input,
-                proof_tree,
-                reached_depth,
-                final_entry.encountered_overflow,
-                final_entry.nested_goals,
-                dep_node,
-                result,
-            )
         }
-
-        self.check_invariants();
-
-        result
     }
 
-    /// Try to fetch a previously computed result from the global cache,
-    /// making sure to only do so if it would match the result of reevaluating
-    /// this goal.
-    fn lookup_global_cache<D: SolverDelegate<Interner = I>>(
-        &mut self,
+    fn on_stack_overflow(
         cx: I,
-        input: CanonicalInput<I>,
-        available_depth: SolverLimit,
         inspect: &mut ProofTreeBuilder<D>,
-    ) -> Option<QueryResult<I>> {
-        let CacheData { result, proof_tree, additional_depth, encountered_overflow } = self
-            .global_cache(cx)
-            // FIXME: Awkward `Limit -> usize -> Limit`.
-            .get(cx, input, self.stack.iter().map(|e| e.input), available_depth.0)?;
-
-        // If we're building a proof tree and the current cache entry does not
-        // contain a proof tree, we do not use the entry but instead recompute
-        // the goal. We simply overwrite the existing entry once we're done,
-        // caching the proof tree.
-        if !inspect.is_noop() {
-            if let Some(final_revision) = proof_tree {
-                let kind = inspect::WipCanonicalGoalEvaluationKind::Interned { final_revision };
-                inspect.canonical_goal_evaluation_kind(kind);
-            } else {
-                return None;
-            }
-        }
-
-        // Adjust the parent goal as if we actually computed this goal.
-        let reached_depth = self.stack.next_index().plus(additional_depth);
-        self.update_parent_goal(reached_depth, encountered_overflow);
-
-        Some(result)
-    }
-}
-
-enum StepResult<I: Interner> {
-    Done(StackEntry<I>, QueryResult<I>),
-    HasChanged,
-}
-
-impl<I: Interner> SearchGraph<I> {
-    /// When we encounter a coinductive cycle, we have to fetch the
-    /// result of that cycle while we are still computing it. Because
-    /// of this we continuously recompute the cycle until the result
-    /// of the previous iteration is equal to the final result, at which
-    /// point we are done.
-    fn fixpoint_step_in_task<D, F>(
-        &mut self,
-        cx: I,
         input: CanonicalInput<I>,
-        inspect: &mut ProofTreeBuilder<D>,
-        prove_goal: &mut F,
-    ) -> StepResult<I>
-    where
-        D: SolverDelegate<Interner = I>,
-        F: FnMut(&mut Self, &mut ProofTreeBuilder<D>) -> QueryResult<I>,
-    {
-        let result = prove_goal(self, inspect);
-        let stack_entry = self.stack.pop().unwrap();
-        debug_assert_eq!(stack_entry.input, input);
-
-        // If the current goal is not the root of a cycle, we are done.
-        if stack_entry.has_been_used.is_empty() {
-            return StepResult::Done(stack_entry, result);
-        }
-
-        // If it is a cycle head, we have to keep trying to prove it until
-        // we reach a fixpoint. We need to do so for all cycle heads,
-        // not only for the root.
-        //
-        // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
-        // for an example.
-
-        // Start by clearing all provisional cache entries which depend on this
-        // the current goal.
-        Self::clear_dependent_provisional_results(
-            &mut self.provisional_cache,
-            self.stack.next_index(),
-        );
-
-        // Check whether we reached a fixpoint, either because the final result
-        // is equal to the provisional result of the previous iteration, or because
-        // this was only the root of either coinductive or inductive cycles, and the
-        // final result is equal to the initial response for that case.
-        let reached_fixpoint = if let Some(r) = stack_entry.provisional_result {
-            r == result
-        } else if stack_entry.has_been_used == HasBeenUsed::COINDUCTIVE_CYCLE {
-            Self::response_no_constraints(cx, input, Certainty::Yes) == result
-        } else if stack_entry.has_been_used == HasBeenUsed::INDUCTIVE_CYCLE {
-            Self::response_no_constraints(cx, input, Certainty::overflow(false)) == result
-        } else {
-            false
-        };
-
-        // If we did not reach a fixpoint, update the provisional result and reevaluate.
-        if reached_fixpoint {
-            StepResult::Done(stack_entry, result)
-        } else {
-            let depth = self.stack.push(StackEntry {
-                has_been_used: HasBeenUsed::empty(),
-                provisional_result: Some(result),
-                ..stack_entry
-            });
-            debug_assert_eq!(self.provisional_cache[&input].stack_depth, Some(depth));
-            StepResult::HasChanged
-        }
-    }
-
-    fn response_no_constraints(
-        cx: I,
-        goal: CanonicalInput<I>,
-        certainty: Certainty,
     ) -> QueryResult<I> {
-        Ok(super::response_no_constraints_raw(cx, goal.max_universe, goal.variables, certainty))
+        inspect.canonical_goal_evaluation_kind(inspect::WipCanonicalGoalEvaluationKind::Overflow);
+        response_no_constraints(cx, input, Certainty::overflow(true))
     }
 
-    #[allow(rustc::potential_query_instability)]
-    fn check_invariants(&self) {
-        if !cfg!(debug_assertions) {
-            return;
-        }
-
-        let SearchGraph { mode: _, stack, provisional_cache } = self;
-        if stack.is_empty() {
-            assert!(provisional_cache.is_empty());
-        }
-
-        for (depth, entry) in stack.iter_enumerated() {
-            let StackEntry {
-                input,
-                available_depth: _,
-                reached_depth: _,
-                non_root_cycle_participant,
-                encountered_overflow: _,
-                has_been_used,
-                ref nested_goals,
-                provisional_result,
-            } = *entry;
-            let cache_entry = provisional_cache.get(&entry.input).unwrap();
-            assert_eq!(cache_entry.stack_depth, Some(depth));
-            if let Some(head) = non_root_cycle_participant {
-                assert!(head < depth);
-                assert!(nested_goals.is_empty());
-                assert_ne!(stack[head].has_been_used, HasBeenUsed::empty());
-
-                let mut current_root = head;
-                while let Some(parent) = stack[current_root].non_root_cycle_participant {
-                    current_root = parent;
-                }
-                assert!(stack[current_root].nested_goals.contains(&input));
-            }
-
-            if !nested_goals.is_empty() {
-                assert!(provisional_result.is_some() || !has_been_used.is_empty());
-                for entry in stack.iter().take(depth.as_usize()) {
-                    assert_eq!(nested_goals.get(&entry.input), None);
-                }
-            }
-        }
-
-        for (&input, entry) in &self.provisional_cache {
-            let ProvisionalCacheEntry { stack_depth, with_coinductive_stack, with_inductive_stack } =
-                entry;
-            assert!(
-                stack_depth.is_some()
-                    || with_coinductive_stack.is_some()
-                    || with_inductive_stack.is_some()
-            );
-
-            if let &Some(stack_depth) = stack_depth {
-                assert_eq!(stack[stack_depth].input, input);
-            }
-
-            let check_detached = |detached_entry: &DetachedEntry<I>| {
-                let DetachedEntry { head, result: _ } = *detached_entry;
-                assert_ne!(stack[head].has_been_used, HasBeenUsed::empty());
-            };
-
-            if let Some(with_coinductive_stack) = with_coinductive_stack {
-                check_detached(with_coinductive_stack);
-            }
+    fn on_fixpoint_overflow(cx: I, input: CanonicalInput<I>) -> QueryResult<I> {
+        response_no_constraints(cx, input, Certainty::overflow(false))
+    }
 
-            if let Some(with_inductive_stack) = with_inductive_stack {
-                check_detached(with_inductive_stack);
-            }
-        }
+    fn step_is_coinductive(cx: I, input: CanonicalInput<I>) -> bool {
+        input.value.goal.predicate.is_coinductive(cx)
     }
 }
+
+fn response_no_constraints<I: Interner>(
+    cx: I,
+    goal: CanonicalInput<I>,
+    certainty: Certainty,
+) -> QueryResult<I> {
+    Ok(super::response_no_constraints_raw(cx, goal.max_universe, goal.variables, certainty))
+}
diff --git a/compiler/rustc_query_system/src/cache.rs b/compiler/rustc_query_system/src/cache.rs
index 6e862db0b2547..d8a5bdba7b8a7 100644
--- a/compiler/rustc_query_system/src/cache.rs
+++ b/compiler/rustc_query_system/src/cache.rs
@@ -40,7 +40,7 @@ impl<Key: Eq + Hash, Value: Clone> Cache<Key, Value> {
     }
 }
 
-#[derive(Clone, Eq, PartialEq)]
+#[derive(Debug, Clone, Eq, PartialEq)]
 pub struct WithDepNode<T> {
     dep_node: DepNodeIndex,
     cached_value: T,
diff --git a/compiler/rustc_type_ir/src/inherent.rs b/compiler/rustc_type_ir/src/inherent.rs
index de86a8536f7af..f05d626b47032 100644
--- a/compiler/rustc_type_ir/src/inherent.rs
+++ b/compiler/rustc_type_ir/src/inherent.rs
@@ -8,11 +8,10 @@ use std::hash::Hash;
 
 use rustc_ast_ir::Mutability;
 
-use crate::data_structures::HashSet;
 use crate::elaborate::Elaboratable;
 use crate::fold::{TypeFoldable, TypeSuperFoldable};
 use crate::relate::Relate;
-use crate::solve::{CacheData, CanonicalInput, QueryResult, Reveal};
+use crate::solve::Reveal;
 use crate::visit::{Flags, TypeSuperVisitable, TypeVisitable};
 use crate::{self as ty, CollectAndApply, Interner, UpcastFrom};
 
@@ -539,33 +538,6 @@ pub trait Features<I: Interner>: Copy {
     fn associated_const_equality(self) -> bool;
 }
 
-pub trait EvaluationCache<I: Interner> {
-    /// Insert a final result into the global cache.
-    fn insert(
-        &self,
-        tcx: I,
-        key: CanonicalInput<I>,
-        proof_tree: Option<I::CanonicalGoalEvaluationStepRef>,
-        additional_depth: usize,
-        encountered_overflow: bool,
-        cycle_participants: HashSet<CanonicalInput<I>>,
-        dep_node: I::DepNodeIndex,
-        result: QueryResult<I>,
-    );
-
-    /// Try to fetch a cached result, checking the recursion limit
-    /// and handling root goals of coinductive cycles.
-    ///
-    /// If this returns `Some` the cache result can be used.
-    fn get(
-        &self,
-        tcx: I,
-        key: CanonicalInput<I>,
-        stack_entries: impl IntoIterator<Item = CanonicalInput<I>>,
-        available_depth: usize,
-    ) -> Option<CacheData<I>>;
-}
-
 pub trait DefId<I: Interner>: Copy + Debug + Hash + Eq + TypeFoldable<I> {
     fn is_local(self) -> bool;
 
diff --git a/compiler/rustc_type_ir/src/interner.rs b/compiler/rustc_type_ir/src/interner.rs
index fdd1553d389d2..14ebbb12fe2f0 100644
--- a/compiler/rustc_type_ir/src/interner.rs
+++ b/compiler/rustc_type_ir/src/interner.rs
@@ -10,8 +10,11 @@ use crate::inherent::*;
 use crate::ir_print::IrPrint;
 use crate::lang_items::TraitSolverLangItem;
 use crate::relate::Relate;
+use crate::search_graph;
 use crate::solve::inspect::CanonicalGoalEvaluationStep;
-use crate::solve::{ExternalConstraintsData, PredefinedOpaquesData, SolverMode};
+use crate::solve::{
+    CanonicalInput, ExternalConstraintsData, PredefinedOpaquesData, QueryResult, SolverMode,
+};
 use crate::visit::{Flags, TypeSuperVisitable, TypeVisitable};
 use crate::{self as ty};
 
@@ -86,6 +89,13 @@ pub trait Interner:
     ) -> Self::ExternalConstraints;
 
     type DepNodeIndex;
+    type Tracked<T: Debug + Clone>: Debug;
+    fn mk_tracked<T: Debug + Clone>(
+        self,
+        data: T,
+        dep_node: Self::DepNodeIndex,
+    ) -> Self::Tracked<T>;
+    fn get_tracked<T: Debug + Clone>(self, tracked: &Self::Tracked<T>) -> T;
     fn with_cached_task<T>(self, task: impl FnOnce() -> T) -> (T, Self::DepNodeIndex);
 
     // Kinds of tys
@@ -125,8 +135,11 @@ pub trait Interner:
     type Clause: Clause<Self>;
     type Clauses: Copy + Debug + Hash + Eq + TypeSuperVisitable<Self> + Flags;
 
-    type EvaluationCache: EvaluationCache<Self>;
-    fn evaluation_cache(self, mode: SolverMode) -> Self::EvaluationCache;
+    fn with_global_cache<R>(
+        self,
+        mode: SolverMode,
+        f: impl FnOnce(&mut search_graph::GlobalCache<Self>) -> R,
+    ) -> R;
 
     fn expand_abstract_consts<T: TypeFoldable<Self>>(self, t: T) -> T;
 
@@ -373,3 +386,32 @@ impl<T, R, E> CollectAndApply<T, R> for Result<T, E> {
         })
     }
 }
+
+impl<I: Interner> search_graph::Cx for I {
+    type ProofTree = Option<I::CanonicalGoalEvaluationStepRef>;
+    type Input = CanonicalInput<I>;
+    type Result = QueryResult<I>;
+
+    type DepNodeIndex = I::DepNodeIndex;
+    type Tracked<T: Debug + Clone> = I::Tracked<T>;
+    fn mk_tracked<T: Debug + Clone>(
+        self,
+        data: T,
+        dep_node_index: I::DepNodeIndex,
+    ) -> I::Tracked<T> {
+        I::mk_tracked(self, data, dep_node_index)
+    }
+    fn get_tracked<T: Debug + Clone>(self, tracked: &I::Tracked<T>) -> T {
+        I::get_tracked(self, tracked)
+    }
+    fn with_cached_task<T>(self, task: impl FnOnce() -> T) -> (T, I::DepNodeIndex) {
+        I::with_cached_task(self, task)
+    }
+    fn with_global_cache<R>(
+        self,
+        mode: SolverMode,
+        f: impl FnOnce(&mut search_graph::GlobalCache<Self>) -> R,
+    ) -> R {
+        I::with_global_cache(self, mode, f)
+    }
+}
diff --git a/compiler/rustc_type_ir/src/lib.rs b/compiler/rustc_type_ir/src/lib.rs
index b14a65fc77958..37ee66fa222ae 100644
--- a/compiler/rustc_type_ir/src/lib.rs
+++ b/compiler/rustc_type_ir/src/lib.rs
@@ -30,6 +30,7 @@ pub mod lang_items;
 pub mod lift;
 pub mod outlives;
 pub mod relate;
+pub mod search_graph;
 pub mod solve;
 
 // These modules are not `pub` since they are glob-imported.
diff --git a/compiler/rustc_type_ir/src/search_graph/global_cache.rs b/compiler/rustc_type_ir/src/search_graph/global_cache.rs
new file mode 100644
index 0000000000000..5ccda931f9c5f
--- /dev/null
+++ b/compiler/rustc_type_ir/src/search_graph/global_cache.rs
@@ -0,0 +1,118 @@
+use rustc_index::IndexVec;
+
+use super::{AvailableDepth, Cx, StackDepth, StackEntry};
+use crate::data_structures::{HashMap, HashSet};
+
+#[derive(derivative::Derivative)]
+#[derivative(Debug(bound = ""), Clone(bound = ""), Copy(bound = ""))]
+struct QueryData<X: Cx> {
+    result: X::Result,
+    proof_tree: X::ProofTree,
+}
+
+struct Success<X: Cx> {
+    data: X::Tracked<QueryData<X>>,
+    additional_depth: usize,
+}
+
+/// The cache entry for a given input.
+///
+/// This contains results whose computation never hit the
+/// recursion limit in `success`, and all results which hit
+/// the recursion limit in `with_overflow`.
+#[derive(derivative::Derivative)]
+#[derivative(Default(bound = ""))]
+struct CacheEntry<X: Cx> {
+    success: Option<Success<X>>,
+    /// We have to be careful when caching roots of cycles.
+    ///
+    /// See the doc comment of `StackEntry::cycle_participants` for more
+    /// details.
+    nested_goals: HashSet<X::Input>,
+    with_overflow: HashMap<usize, X::Tracked<QueryData<X>>>,
+}
+
+#[derive(derivative::Derivative)]
+#[derivative(Debug(bound = ""))]
+pub(super) struct CacheData<'a, X: Cx> {
+    pub(super) result: X::Result,
+    pub(super) proof_tree: X::ProofTree,
+    pub(super) additional_depth: usize,
+    pub(super) encountered_overflow: bool,
+    // FIXME: This is currently unused, but impacts the design
+    // by requiring a closure for `Cx::with_global_cache`.
+    pub(super) nested_goals: &'a HashSet<X::Input>,
+}
+
+#[derive(derivative::Derivative)]
+#[derivative(Default(bound = ""))]
+pub struct GlobalCache<X: Cx> {
+    map: HashMap<X::Input, CacheEntry<X>>,
+}
+
+impl<X: Cx> GlobalCache<X> {
+    /// Insert a final result into the global cache.
+    pub(super) fn insert(
+        &mut self,
+        cx: X,
+        input: X::Input,
+
+        result: X::Result,
+        proof_tree: X::ProofTree,
+        dep_node: X::DepNodeIndex,
+
+        additional_depth: usize,
+        encountered_overflow: bool,
+        nested_goals: &HashSet<X::Input>,
+    ) {
+        let data = cx.mk_tracked(QueryData { result, proof_tree }, dep_node);
+        let entry = self.map.entry(input).or_default();
+        entry.nested_goals.extend(nested_goals);
+        if encountered_overflow {
+            entry.with_overflow.insert(additional_depth, data);
+        } else {
+            entry.success = Some(Success { data, additional_depth });
+        }
+    }
+
+    /// Try to fetch a cached result, checking the recursion limit
+    /// and handling root goals of coinductive cycles.
+    ///
+    /// If this returns `Some` the cache result can be used.
+    pub(super) fn get<'a>(
+        &'a self,
+        cx: X,
+        input: X::Input,
+        stack: &IndexVec<StackDepth, StackEntry<X>>,
+        available_depth: AvailableDepth,
+    ) -> Option<CacheData<'a, X>> {
+        let entry = self.map.get(&input)?;
+        if stack.iter().any(|e| entry.nested_goals.contains(&e.input)) {
+            return None;
+        }
+
+        if let Some(ref success) = entry.success {
+            if available_depth.cache_entry_is_applicable(success.additional_depth) {
+                let QueryData { result, proof_tree } = cx.get_tracked(&success.data);
+                return Some(CacheData {
+                    result,
+                    proof_tree,
+                    additional_depth: success.additional_depth,
+                    encountered_overflow: false,
+                    nested_goals: &entry.nested_goals,
+                });
+            }
+        }
+
+        entry.with_overflow.get(&available_depth.0).map(|e| {
+            let QueryData { result, proof_tree } = cx.get_tracked(e);
+            CacheData {
+                result,
+                proof_tree,
+                additional_depth: available_depth.0,
+                encountered_overflow: true,
+                nested_goals: &entry.nested_goals,
+            }
+        })
+    }
+}
diff --git a/compiler/rustc_type_ir/src/search_graph/mod.rs b/compiler/rustc_type_ir/src/search_graph/mod.rs
new file mode 100644
index 0000000000000..c2204becdfd71
--- /dev/null
+++ b/compiler/rustc_type_ir/src/search_graph/mod.rs
@@ -0,0 +1,605 @@
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::marker::PhantomData;
+use std::mem;
+
+use rustc_index::{Idx, IndexVec};
+use tracing::debug;
+
+use crate::data_structures::{HashMap, HashSet};
+use crate::solve::SolverMode;
+
+mod global_cache;
+use global_cache::CacheData;
+pub use global_cache::GlobalCache;
+mod validate;
+
+/// The search graph does not simply use `Interner` directly
+/// to enable its fuzzing without having to stub the rest of
+/// the interner. We don't make this a super trait of `Interner`
+/// as users of the shared type library shouldn't have to care
+/// about `Input` and `Result` as they are implementation details
+/// of the search graph.
+pub trait Cx: Copy {
+    type ProofTree: Debug + Copy;
+    type Input: Debug + Eq + Hash + Copy;
+    type Result: Debug + Eq + Hash + Copy;
+
+    type DepNodeIndex;
+    type Tracked<T: Debug + Clone>: Debug;
+    fn mk_tracked<T: Debug + Clone>(
+        self,
+        data: T,
+        dep_node_index: Self::DepNodeIndex,
+    ) -> Self::Tracked<T>;
+    fn get_tracked<T: Debug + Clone>(self, tracked: &Self::Tracked<T>) -> T;
+    fn with_cached_task<T>(self, task: impl FnOnce() -> T) -> (T, Self::DepNodeIndex);
+
+    fn with_global_cache<R>(
+        self,
+        mode: SolverMode,
+        f: impl FnOnce(&mut GlobalCache<Self>) -> R,
+    ) -> R;
+}
+
+pub trait ProofTreeBuilder<X: Cx> {
+    fn try_apply_proof_tree(&mut self, proof_tree: X::ProofTree) -> bool;
+    fn on_provisional_cache_hit(&mut self);
+    fn on_cycle_in_stack(&mut self);
+    fn finalize_canonical_goal_evaluation(&mut self, cx: X) -> X::ProofTree;
+}
+
+pub trait Delegate {
+    type Cx: Cx;
+    const FIXPOINT_STEP_LIMIT: usize;
+    type ProofTreeBuilder: ProofTreeBuilder<Self::Cx>;
+
+    fn recursion_limit(cx: Self::Cx) -> usize;
+
+    fn initial_provisional_result(
+        cx: Self::Cx,
+        kind: CycleKind,
+        input: <Self::Cx as Cx>::Input,
+    ) -> <Self::Cx as Cx>::Result;
+    fn reached_fixpoint(
+        cx: Self::Cx,
+        kind: UsageKind,
+        input: <Self::Cx as Cx>::Input,
+        provisional_result: Option<<Self::Cx as Cx>::Result>,
+        result: <Self::Cx as Cx>::Result,
+    ) -> bool;
+    fn on_stack_overflow(
+        cx: Self::Cx,
+        inspect: &mut Self::ProofTreeBuilder,
+        input: <Self::Cx as Cx>::Input,
+    ) -> <Self::Cx as Cx>::Result;
+    fn on_fixpoint_overflow(
+        cx: Self::Cx,
+        input: <Self::Cx as Cx>::Input,
+    ) -> <Self::Cx as Cx>::Result;
+
+    fn step_is_coinductive(cx: Self::Cx, input: <Self::Cx as Cx>::Input) -> bool;
+}
+
+/// In the initial iteration of a cycle, we do not yet have a provisional
+/// result. In the case we return an initial provisional result depending
+/// on the kind of cycle.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CycleKind {
+    Coinductive,
+    Inductive,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum UsageKind {
+    Single(CycleKind),
+    Mixed,
+}
+impl UsageKind {
+    fn merge(self, other: Self) -> Self {
+        match (self, other) {
+            (UsageKind::Single(lhs), UsageKind::Single(rhs)) => {
+                if lhs == rhs {
+                    UsageKind::Single(lhs)
+                } else {
+                    UsageKind::Mixed
+                }
+            }
+            (UsageKind::Mixed, UsageKind::Mixed)
+            | (UsageKind::Mixed, UsageKind::Single(_))
+            | (UsageKind::Single(_), UsageKind::Mixed) => UsageKind::Mixed,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct AvailableDepth(usize);
+impl AvailableDepth {
+    /// Returns the remaining depth allowed for nested goals.
+    ///
+    /// This is generally simply one less than the current depth.
+    /// However, if we encountered overflow, we significantly reduce
+    /// the remaining depth of all nested goals to prevent hangs
+    /// in case there is exponential blowup.
+    fn allowed_depth_for_nested<D: Delegate>(
+        cx: D::Cx,
+        stack: &IndexVec<StackDepth, StackEntry<D::Cx>>,
+    ) -> Option<AvailableDepth> {
+        if let Some(last) = stack.raw.last() {
+            if last.available_depth.0 == 0 {
+                return None;
+            }
+
+            Some(if last.encountered_overflow {
+                AvailableDepth(last.available_depth.0 / 2)
+            } else {
+                AvailableDepth(last.available_depth.0 - 1)
+            })
+        } else {
+            Some(AvailableDepth(D::recursion_limit(cx)))
+        }
+    }
+
+    /// Whether we're allowed to use a global cache entry which required
+    /// the given depth.
+    fn cache_entry_is_applicable(self, additional_depth: usize) -> bool {
+        self.0 >= additional_depth
+    }
+}
+
+rustc_index::newtype_index! {
+    #[orderable]
+    #[gate_rustc_only]
+    pub struct StackDepth {}
+}
+
+#[derive(derivative::Derivative)]
+#[derivative(Debug(bound = ""))]
+struct StackEntry<X: Cx> {
+    input: X::Input,
+
+    available_depth: AvailableDepth,
+
+    /// The maximum depth reached by this stack entry, only up-to date
+    /// for the top of the stack and lazily updated for the rest.
+    reached_depth: StackDepth,
+
+    /// Whether this entry is a non-root cycle participant.
+    ///
+    /// We must not move the result of non-root cycle participants to the
+    /// global cache. We store the highest stack depth of a head of a cycle
+    /// this goal is involved in. This necessary to soundly cache its
+    /// provisional result.
+    non_root_cycle_participant: Option<StackDepth>,
+
+    encountered_overflow: bool,
+
+    has_been_used: Option<UsageKind>,
+
+    /// We put only the root goal of a coinductive cycle into the global cache.
+    ///
+    /// If we were to use that result when later trying to prove another cycle
+    /// participant, we can end up with unstable query results.
+    ///
+    /// See tests/ui/next-solver/coinduction/incompleteness-unstable-result.rs for
+    /// an example of where this is needed.
+    ///
+    /// There can  be multiple roots on the same stack, so we need to track
+    /// cycle participants per root:
+    /// ```plain
+    /// A :- B
+    /// B :- A, C
+    /// C :- D
+    /// D :- C
+    /// ```
+    nested_goals: HashSet<X::Input>,
+    /// Starts out as `None` and gets set when rerunning this
+    /// goal in case we encounter a cycle.
+    provisional_result: Option<X::Result>,
+}
+
+/// The provisional result for a goal which is not on the stack.
+#[derive(Debug)]
+struct DetachedEntry<X: Cx> {
+    /// The head of the smallest non-trivial cycle involving this entry.
+    ///
+    /// Given the following rules, when proving `A` the head for
+    /// the provisional entry of `C` would be `B`.
+    /// ```plain
+    /// A :- B
+    /// B :- C
+    /// C :- A + B + C
+    /// ```
+    head: StackDepth,
+    result: X::Result,
+}
+
+/// Stores the stack depth of a currently evaluated goal *and* already
+/// computed results for goals which depend on other goals still on the stack.
+///
+/// The provisional result may depend on whether the stack above it is inductive
+/// or coinductive. Because of this, we store separate provisional results for
+/// each case. If an provisional entry is not applicable, it may be the case
+/// that we already have provisional result while computing a goal. In this case
+/// we prefer the provisional result to potentially avoid fixpoint iterations.
+/// See tests/ui/traits/next-solver/cycles/mixed-cycles-2.rs for an example.
+///
+/// The provisional cache can theoretically result in changes to the observable behavior,
+/// see tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs.
+#[derive(derivative::Derivative)]
+#[derivative(Default(bound = ""))]
+struct ProvisionalCacheEntry<X: Cx> {
+    stack_depth: Option<StackDepth>,
+    with_inductive_stack: Option<DetachedEntry<X>>,
+    with_coinductive_stack: Option<DetachedEntry<X>>,
+}
+
+impl<X: Cx> ProvisionalCacheEntry<X> {
+    fn is_empty(&self) -> bool {
+        self.stack_depth.is_none()
+            && self.with_inductive_stack.is_none()
+            && self.with_coinductive_stack.is_none()
+    }
+}
+
+pub struct SearchGraph<D: Delegate<Cx = X>, X: Cx = <D as Delegate>::Cx> {
+    mode: SolverMode,
+    /// The stack of goals currently being computed.
+    ///
+    /// An element is *deeper* in the stack if its index is *lower*.
+    stack: IndexVec<StackDepth, StackEntry<X>>,
+    provisional_cache: HashMap<X::Input, ProvisionalCacheEntry<X>>,
+
+    _marker: PhantomData<D>,
+}
+
+impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
+    pub fn new(mode: SolverMode) -> SearchGraph<D> {
+        Self {
+            mode,
+            stack: Default::default(),
+            provisional_cache: Default::default(),
+            _marker: PhantomData,
+        }
+    }
+
+    pub fn solver_mode(&self) -> SolverMode {
+        self.mode
+    }
+
+    fn update_parent_goal(&mut self, reached_depth: StackDepth, encountered_overflow: bool) {
+        if let Some(parent) = self.stack.raw.last_mut() {
+            parent.reached_depth = parent.reached_depth.max(reached_depth);
+            parent.encountered_overflow |= encountered_overflow;
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.stack.is_empty()
+    }
+
+    fn stack_coinductive_from(
+        cx: X,
+        stack: &IndexVec<StackDepth, StackEntry<X>>,
+        head: StackDepth,
+    ) -> bool {
+        stack.raw[head.index()..].iter().all(|entry| D::step_is_coinductive(cx, entry.input))
+    }
+
+    // When encountering a solver cycle, the result of the current goal
+    // depends on goals lower on the stack.
+    //
+    // We have to therefore be careful when caching goals. Only the final result
+    // of the cycle root, i.e. the lowest goal on the stack involved in this cycle,
+    // is moved to the global cache while all others are stored in a provisional cache.
+    //
+    // We update both the head of this cycle to rerun its evaluation until
+    // we reach a fixpoint and all other cycle participants to make sure that
+    // their result does not get moved to the global cache.
+    fn tag_cycle_participants(
+        stack: &mut IndexVec<StackDepth, StackEntry<X>>,
+        usage_kind: Option<UsageKind>,
+        head: StackDepth,
+    ) {
+        if let Some(usage_kind) = usage_kind {
+            stack[head].has_been_used =
+                Some(stack[head].has_been_used.map_or(usage_kind, |prev| prev.merge(usage_kind)));
+        }
+        debug_assert!(stack[head].has_been_used.is_some());
+
+        // The current root of these cycles. Note that this may not be the final
+        // root in case a later goal depends on a goal higher up the stack.
+        let mut current_root = head;
+        while let Some(parent) = stack[current_root].non_root_cycle_participant {
+            current_root = parent;
+            debug_assert!(stack[current_root].has_been_used.is_some());
+        }
+
+        let (stack, cycle_participants) = stack.raw.split_at_mut(head.index() + 1);
+        let current_cycle_root = &mut stack[current_root.as_usize()];
+        for entry in cycle_participants {
+            entry.non_root_cycle_participant = entry.non_root_cycle_participant.max(Some(head));
+            current_cycle_root.nested_goals.insert(entry.input);
+            current_cycle_root.nested_goals.extend(mem::take(&mut entry.nested_goals));
+        }
+    }
+
+    fn clear_dependent_provisional_results(
+        provisional_cache: &mut HashMap<X::Input, ProvisionalCacheEntry<X>>,
+        head: StackDepth,
+    ) {
+        #[allow(rustc::potential_query_instability)]
+        provisional_cache.retain(|_, entry| {
+            if entry.with_coinductive_stack.as_ref().is_some_and(|p| p.head == head) {
+                entry.with_coinductive_stack.take();
+            }
+            if entry.with_inductive_stack.as_ref().is_some_and(|p| p.head == head) {
+                entry.with_inductive_stack.take();
+            }
+            !entry.is_empty()
+        });
+    }
+
+    /// Probably the most involved method of the whole solver.
+    ///
+    /// Given some goal which is proven via the `prove_goal` closure, this
+    /// handles caching, overflow, and coinductive cycles.
+    pub fn with_new_goal(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        inspect: &mut D::ProofTreeBuilder,
+        mut prove_goal: impl FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
+    ) -> X::Result {
+        self.check_invariants();
+        // Check for overflow.
+        let Some(available_depth) = AvailableDepth::allowed_depth_for_nested::<D>(cx, &self.stack)
+        else {
+            if let Some(last) = self.stack.raw.last_mut() {
+                last.encountered_overflow = true;
+            }
+
+            debug!("encountered stack overflow");
+            return D::on_stack_overflow(cx, inspect, input);
+        };
+
+        if let Some(result) = self.lookup_global_cache(cx, input, available_depth, inspect) {
+            return result;
+        }
+
+        // Check whether the goal is in the provisional cache.
+        // The provisional result may rely on the path to its cycle roots,
+        // so we have to check the path of the current goal matches that of
+        // the cache entry.
+        let cache_entry = self.provisional_cache.entry(input).or_default();
+        if let Some(entry) = cache_entry
+            .with_coinductive_stack
+            .as_ref()
+            .filter(|p| Self::stack_coinductive_from(cx, &self.stack, p.head))
+            .or_else(|| {
+                cache_entry
+                    .with_inductive_stack
+                    .as_ref()
+                    .filter(|p| !Self::stack_coinductive_from(cx, &self.stack, p.head))
+            })
+        {
+            debug!("provisional cache hit");
+            // We have a nested goal which is already in the provisional cache, use
+            // its result. We do not provide any usage kind as that should have been
+            // already set correctly while computing the cache entry.
+            inspect.on_provisional_cache_hit();
+            Self::tag_cycle_participants(&mut self.stack, None, entry.head);
+            return entry.result;
+        } else if let Some(stack_depth) = cache_entry.stack_depth {
+            debug!("encountered cycle with depth {stack_depth:?}");
+            // We have a nested goal which directly relies on a goal deeper in the stack.
+            //
+            // We start by tagging all cycle participants, as that's necessary for caching.
+            //
+            // Finally we can return either the provisional response or the initial response
+            // in case we're in the first fixpoint iteration for this goal.
+            inspect.on_cycle_in_stack();
+
+            let is_coinductive_cycle = Self::stack_coinductive_from(cx, &self.stack, stack_depth);
+            let cycle_kind =
+                if is_coinductive_cycle { CycleKind::Coinductive } else { CycleKind::Inductive };
+            Self::tag_cycle_participants(
+                &mut self.stack,
+                Some(UsageKind::Single(cycle_kind)),
+                stack_depth,
+            );
+
+            // Return the provisional result or, if we're in the first iteration,
+            // start with no constraints.
+            return if let Some(result) = self.stack[stack_depth].provisional_result {
+                result
+            } else {
+                D::initial_provisional_result(cx, cycle_kind, input)
+            };
+        } else {
+            // No entry, we push this goal on the stack and try to prove it.
+            let depth = self.stack.next_index();
+            let entry = StackEntry {
+                input,
+                available_depth,
+                reached_depth: depth,
+                non_root_cycle_participant: None,
+                encountered_overflow: false,
+                has_been_used: None,
+                nested_goals: Default::default(),
+                provisional_result: None,
+            };
+            assert_eq!(self.stack.push(entry), depth);
+            cache_entry.stack_depth = Some(depth);
+        };
+
+        // This is for global caching, so we properly track query dependencies.
+        // Everything that affects the `result` should be performed within this
+        // `with_anon_task` closure. If computing this goal depends on something
+        // not tracked by the cache key and from outside of this anon task, it
+        // must not be added to the global cache. Notably, this is the case for
+        // trait solver cycles participants.
+        let ((final_entry, result), dep_node) = cx.with_cached_task(|| {
+            for _ in 0..D::FIXPOINT_STEP_LIMIT {
+                match self.fixpoint_step_in_task(cx, input, inspect, &mut prove_goal) {
+                    StepResult::Done(final_entry, result) => return (final_entry, result),
+                    StepResult::HasChanged => debug!("fixpoint changed provisional results"),
+                }
+            }
+
+            debug!("canonical cycle overflow");
+            let current_entry = self.stack.pop().unwrap();
+            debug_assert!(current_entry.has_been_used.is_none());
+            let result = D::on_fixpoint_overflow(cx, input);
+            (current_entry, result)
+        });
+
+        let proof_tree = inspect.finalize_canonical_goal_evaluation(cx);
+
+        self.update_parent_goal(final_entry.reached_depth, final_entry.encountered_overflow);
+
+        // We're now done with this goal. In case this goal is involved in a larger cycle
+        // do not remove it from the provisional cache and update its provisional result.
+        // We only add the root of cycles to the global cache.
+        if let Some(head) = final_entry.non_root_cycle_participant {
+            let coinductive_stack = Self::stack_coinductive_from(cx, &self.stack, head);
+
+            let entry = self.provisional_cache.get_mut(&input).unwrap();
+            entry.stack_depth = None;
+            if coinductive_stack {
+                entry.with_coinductive_stack = Some(DetachedEntry { head, result });
+            } else {
+                entry.with_inductive_stack = Some(DetachedEntry { head, result });
+            }
+        } else {
+            // When encountering a cycle, both inductive and coinductive, we only
+            // move the root into the global cache. We also store all other cycle
+            // participants involved.
+            //
+            // We must not use the global cache entry of a root goal if a cycle
+            // participant is on the stack. This is necessary to prevent unstable
+            // results. See the comment of `StackEntry::nested_goals` for
+            // more details.
+            self.provisional_cache.remove(&input);
+            let additional_depth = final_entry.reached_depth.as_usize() - self.stack.len();
+            cx.with_global_cache(self.mode, |cache| {
+                cache.insert(
+                    cx,
+                    input,
+                    result,
+                    proof_tree,
+                    dep_node,
+                    additional_depth,
+                    final_entry.encountered_overflow,
+                    &final_entry.nested_goals,
+                )
+            })
+        }
+
+        self.check_invariants();
+
+        result
+    }
+
+    /// Try to fetch a previously computed result from the global cache,
+    /// making sure to only do so if it would match the result of reevaluating
+    /// this goal.
+    fn lookup_global_cache(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        available_depth: AvailableDepth,
+        inspect: &mut D::ProofTreeBuilder,
+    ) -> Option<X::Result> {
+        cx.with_global_cache(self.mode, |cache| {
+            let CacheData {
+                result,
+                proof_tree,
+                additional_depth,
+                encountered_overflow,
+                nested_goals: _, // FIXME: consider nested goals here.
+            } = cache.get(cx, input, &self.stack, available_depth)?;
+
+            // If we're building a proof tree and the current cache entry does not
+            // contain a proof tree, we do not use the entry but instead recompute
+            // the goal. We simply overwrite the existing entry once we're done,
+            // caching the proof tree.
+            if !inspect.try_apply_proof_tree(proof_tree) {
+                return None;
+            }
+
+            // Update the reached depth of the current goal to make sure
+            // its state is the same regardless of whether we've used the
+            // global cache or not.
+            let reached_depth = self.stack.next_index().plus(additional_depth);
+            self.update_parent_goal(reached_depth, encountered_overflow);
+
+            debug!("global cache hit");
+            Some(result)
+        })
+    }
+}
+
+enum StepResult<X: Cx> {
+    Done(StackEntry<X>, X::Result),
+    HasChanged,
+}
+
+impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
+    /// When we encounter a coinductive cycle, we have to fetch the
+    /// result of that cycle while we are still computing it. Because
+    /// of this we continuously recompute the cycle until the result
+    /// of the previous iteration is equal to the final result, at which
+    /// point we are done.
+    fn fixpoint_step_in_task<F>(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        inspect: &mut D::ProofTreeBuilder,
+        prove_goal: &mut F,
+    ) -> StepResult<X>
+    where
+        F: FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
+    {
+        let result = prove_goal(self, inspect);
+        let stack_entry = self.stack.pop().unwrap();
+        debug_assert_eq!(stack_entry.input, input);
+
+        // If the current goal is not the root of a cycle, we are done.
+        let Some(usage_kind) = stack_entry.has_been_used else {
+            return StepResult::Done(stack_entry, result);
+        };
+
+        // If it is a cycle head, we have to keep trying to prove it until
+        // we reach a fixpoint. We need to do so for all cycle heads,
+        // not only for the root.
+        //
+        // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
+        // for an example.
+
+        // Start by clearing all provisional cache entries which depend on this
+        // the current goal.
+        Self::clear_dependent_provisional_results(
+            &mut self.provisional_cache,
+            self.stack.next_index(),
+        );
+
+        // Check whether we reached a fixpoint, either because the final result
+        // is equal to the provisional result of the previous iteration, or because
+        // this was only the root of either coinductive or inductive cycles, and the
+        // final result is equal to the initial response for that case.
+        //
+        // If we did not reach a fixpoint, update the provisional result and reevaluate.
+        if D::reached_fixpoint(cx, usage_kind, input, stack_entry.provisional_result, result) {
+            StepResult::Done(stack_entry, result)
+        } else {
+            let depth = self.stack.push(StackEntry {
+                has_been_used: None,
+                provisional_result: Some(result),
+                ..stack_entry
+            });
+            debug_assert_eq!(self.provisional_cache[&input].stack_depth, Some(depth));
+            StepResult::HasChanged
+        }
+    }
+}
diff --git a/compiler/rustc_type_ir/src/search_graph/validate.rs b/compiler/rustc_type_ir/src/search_graph/validate.rs
new file mode 100644
index 0000000000000..1ae806834ba7d
--- /dev/null
+++ b/compiler/rustc_type_ir/src/search_graph/validate.rs
@@ -0,0 +1,75 @@
+use super::*;
+
+impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
+    #[allow(rustc::potential_query_instability)]
+    pub(super) fn check_invariants(&self) {
+        if !cfg!(debug_assertions) {
+            return;
+        }
+
+        let SearchGraph { mode: _, stack, provisional_cache, _marker } = self;
+        if stack.is_empty() {
+            assert!(provisional_cache.is_empty());
+        }
+
+        for (depth, entry) in stack.iter_enumerated() {
+            let StackEntry {
+                input,
+                available_depth: _,
+                reached_depth: _,
+                non_root_cycle_participant,
+                encountered_overflow: _,
+                has_been_used,
+                ref nested_goals,
+                provisional_result,
+            } = *entry;
+            let cache_entry = provisional_cache.get(&entry.input).unwrap();
+            assert_eq!(cache_entry.stack_depth, Some(depth));
+            if let Some(head) = non_root_cycle_participant {
+                assert!(head < depth);
+                assert!(nested_goals.is_empty());
+                assert_ne!(stack[head].has_been_used, None);
+
+                let mut current_root = head;
+                while let Some(parent) = stack[current_root].non_root_cycle_participant {
+                    current_root = parent;
+                }
+                assert!(stack[current_root].nested_goals.contains(&input));
+            }
+
+            if !nested_goals.is_empty() {
+                assert!(provisional_result.is_some() || has_been_used.is_some());
+                for entry in stack.iter().take(depth.as_usize()) {
+                    assert_eq!(nested_goals.get(&entry.input), None);
+                }
+            }
+        }
+
+        for (&input, entry) in &self.provisional_cache {
+            let ProvisionalCacheEntry { stack_depth, with_coinductive_stack, with_inductive_stack } =
+                entry;
+            assert!(
+                stack_depth.is_some()
+                    || with_coinductive_stack.is_some()
+                    || with_inductive_stack.is_some()
+            );
+
+            if let &Some(stack_depth) = stack_depth {
+                assert_eq!(stack[stack_depth].input, input);
+            }
+
+            let check_detached = |detached_entry: &DetachedEntry<X>| {
+                let DetachedEntry { head, result: _ } = *detached_entry;
+                assert_ne!(stack[head].has_been_used, None);
+            };
+
+            if let Some(with_coinductive_stack) = with_coinductive_stack {
+                check_detached(with_coinductive_stack);
+            }
+
+            if let Some(with_inductive_stack) = with_inductive_stack {
+                check_detached(with_inductive_stack);
+            }
+        }
+    }
+}