From 2598e4574e9136690add3cef55fbb8ac7356f3d2 Mon Sep 17 00:00:00 2001
From: Jon Gjengset <jon@thesquareplanet.com>
Date: Wed, 5 Apr 2017 15:33:24 -0400
Subject: [PATCH 1/7] Add safe wrapper for atomic_singlethreadfence_*

---
 src/libcore/sync/atomic.rs | 41 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/src/libcore/sync/atomic.rs b/src/libcore/sync/atomic.rs
index ae47e6fdfa928..948edda832b46 100644
--- a/src/libcore/sync/atomic.rs
+++ b/src/libcore/sync/atomic.rs
@@ -1572,6 +1572,47 @@ pub fn fence(order: Ordering) {
 }
 
 
+/// A compiler memory barrier.
+///
+/// `compiler_barrier` does not emit any machine code, but prevents the compiler from re-ordering
+/// memory operations across this point. Which reorderings are disallowed is dictated by the given
+/// [`Ordering`]. Note that `compiler_barrier` does *not* introduce inter-thread memory
+/// synchronization; for that, a [`fence`] is needed.
+///
+/// The re-ordering prevented by the different ordering semantics are:
+///
+///  - with [`SeqCst`], no re-ordering of reads and writes across this point is allowed.
+///  - with [`Release`], preceding reads and writes cannot be moved past subsequent writes.
+///  - with [`Acquire`], subsequent reads and writes cannot be moved ahead of preceding reads.
+///  - with [`AcqRel`], both of the above rules are enforced.
+///
+/// # Panics
+///
+/// Panics if `order` is [`Relaxed`].
+///
+/// [`fence`]: fn.fence.html
+/// [`Ordering`]: enum.Ordering.html
+/// [`Acquire`]: enum.Ordering.html#variant.Acquire
+/// [`SeqCst`]: enum.Ordering.html#variant.SeqCst
+/// [`Release`]: enum.Ordering.html#variant.Release
+/// [`AcqRel`]: enum.Ordering.html#variant.AcqRel
+/// [`Relaxed`]: enum.Ordering.html#variant.Relaxed
+#[inline]
+#[unstable(feature = "std_compiler_fences", issue = "41091")]
+pub fn compiler_barrier(order: Ordering) {
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_singlethreadfence_acq(),
+            Release => intrinsics::atomic_singlethreadfence_rel(),
+            AcqRel => intrinsics::atomic_singlethreadfence_acqrel(),
+            SeqCst => intrinsics::atomic_singlethreadfence(),
+            Relaxed => panic!("there is no such thing as a relaxed barrier"),
+            __Nonexhaustive => panic!("invalid memory ordering"),
+        }
+    }
+}
+
+
 #[cfg(target_has_atomic = "8")]
 #[stable(feature = "atomic_debug", since = "1.3.0")]
 impl fmt::Debug for AtomicBool {

From f6d262a326b3a44954773fed6983215b62fe4862 Mon Sep 17 00:00:00 2001
From: Jon Gjengset <jon@thesquareplanet.com>
Date: Wed, 5 Apr 2017 21:39:43 -0400
Subject: [PATCH 2/7] Add unstable book entry

---
 src/doc/unstable-book/src/SUMMARY.md          |  1 +
 .../unstable-book/src/compiler-barriers.md    | 98 +++++++++++++++++++
 src/libcore/sync/atomic.rs                    |  2 +-
 3 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 src/doc/unstable-book/src/compiler-barriers.md

diff --git a/src/doc/unstable-book/src/SUMMARY.md b/src/doc/unstable-book/src/SUMMARY.md
index 292f5a1ec816a..68f31ca75e08e 100644
--- a/src/doc/unstable-book/src/SUMMARY.md
+++ b/src/doc/unstable-book/src/SUMMARY.md
@@ -37,6 +37,7 @@
 - [collections](collections.md)
 - [collections_range](collections-range.md)
 - [command_envs](command-envs.md)
+- [compiler_barriers](compiler-barriers.md)
 - [compiler_builtins](compiler-builtins.md)
 - [compiler_builtins_lib](compiler-builtins-lib.md)
 - [concat_idents](concat-idents.md)
diff --git a/src/doc/unstable-book/src/compiler-barriers.md b/src/doc/unstable-book/src/compiler-barriers.md
new file mode 100644
index 0000000000000..84190dab32737
--- /dev/null
+++ b/src/doc/unstable-book/src/compiler-barriers.md
@@ -0,0 +1,98 @@
+# `compiler_barriers`
+
+The tracking issue for this feature is: [#41092]
+
+[#41092]: https://github.com/rust-lang/rust/issues/41092
+
+------------------------
+
+The `compiler_barriers` feature exposes the `compiler_barrier` function
+in `std::sync::atomic`. This function is conceptually similar to C++'s
+`atomic_signal_fence`, which can currently only be accessed in nightly
+Rust using the `atomic_singlethreadfence_*` instrinsic functions in
+`core`, or through the mostly equivalent literal assembly:
+
+```rust
+#![feature(asm)]
+unsafe { asm!("" ::: "memory" : "volatile") };
+```
+
+A `compiler_barrier` restricts the kinds of memory re-ordering the
+compiler is allowed to do. Specifically, depending on the given ordering
+semantics, the compiler may be disallowed from moving reads or writes
+from before or after the call to the other side of the call to
+`compiler_barrier`.
+
+## Examples
+
+The need to prevent re-ordering of reads and writes often arises when
+working with low-level devices. Consider a piece of code that interacts
+with an ethernet card with a set of internal registers that are accessed
+through an address port register (`a: &mut usize`) and a data port
+register (`d: &usize`). To read internal register 5, the following code
+might then be used:
+
+```rust
+fn read_fifth(a: &mut usize, d: &usize) -> usize {
+    *a = 5;
+    *d
+}
+```
+
+In this case, the compiler is free to re-order these two statements if
+it thinks doing so might result in better performance, register use, or
+anything else compilers care about. However, in doing so, it would break
+the code, as `x` would be set to the value of some other device
+register!
+
+By inserting a compiler barrier, we can force the compiler to not
+re-arrange these two statements, making the code function correctly
+again:
+
+```rust
+#![feature(compiler_barriers)]
+use std::sync::atomic;
+
+fn read_fifth(a: &mut usize, d: &usize) -> usize {
+    *a = 5;
+    atomic::compiler_barrier(atomic::Ordering::SeqCst);
+    *d
+}
+```
+
+Compiler barriers are also useful in code that implements low-level
+synchronization primitives. Consider a structure with two different
+atomic variables, with a dependency chain between them:
+
+```rust
+use std::sync::atomic;
+
+fn thread1(x: &atomic::AtomicUsize, y: &atomic::AtomicUsize) {
+    x.store(1, atomic::Ordering::Release);
+    let v1 = y.load(atomic::Ordering::Acquire);
+}
+fn thread2(x: &atomic::AtomicUsize, y: &atomic::AtomicUsize) {
+    y.store(1, atomic::Ordering::Release);
+    let v2 = x.load(atomic::Ordering::Acquire);
+}
+```
+
+This code will guarantee that `thread1` sees any writes to `y` made by
+`thread2`, and that `thread2` sees any writes to `x`. Intuitively, one
+might also expect that if `thread2` sees `v2 == 0`, `thread1` must see
+`v1 == 1` (since `thread2`'s store happened before its `load`, and its
+load did not see `thread1`'s store). However, the code as written does
+*not* guarantee this, because the compiler is allowed to re-order the
+store and load within each thread. To enforce this particular behavior,
+a call to `compiler_barrier(Ordering::SeqCst)` would need to be inserted
+between the `store` and `load` in both functions.
+
+Compiler barriers with weaker re-ordering semantics (such as
+`Ordering::Acquire`) can also be useful, but are beyond the scope of
+this text. Curious readers are encouraged to read the Linux kernel's
+discussion of [memory barriers][1], as well as C++ references on
+[`std::memory_order`][2] and [`atomic_signal_fence`][3].
+
+[1]: https://www.kernel.org/doc/Documentation/memory-barriers.txt
+[2]: http://en.cppreference.com/w/cpp/atomic/memory_order
+[3]: http://www.cplusplus.com/reference/atomic/atomic_signal_fence/
diff --git a/src/libcore/sync/atomic.rs b/src/libcore/sync/atomic.rs
index 948edda832b46..8cf1d1d54a5e9 100644
--- a/src/libcore/sync/atomic.rs
+++ b/src/libcore/sync/atomic.rs
@@ -1598,7 +1598,7 @@ pub fn fence(order: Ordering) {
 /// [`AcqRel`]: enum.Ordering.html#variant.AcqRel
 /// [`Relaxed`]: enum.Ordering.html#variant.Relaxed
 #[inline]
-#[unstable(feature = "std_compiler_fences", issue = "41091")]
+#[unstable(feature = "compiler_barriers", issue = "41091")]
 pub fn compiler_barrier(order: Ordering) {
     unsafe {
         match order {

From 5c6f7fafbdcfe412f0ee836562c3682818604132 Mon Sep 17 00:00:00 2001
From: Jon Gjengset <jon@thesquareplanet.com>
Date: Thu, 6 Apr 2017 03:45:08 -0400
Subject: [PATCH 3/7] Point to tracking issue, not PR

---
 src/doc/unstable-book/src/compiler-barriers.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/doc/unstable-book/src/compiler-barriers.md b/src/doc/unstable-book/src/compiler-barriers.md
index 84190dab32737..5a5c539609c72 100644
--- a/src/doc/unstable-book/src/compiler-barriers.md
+++ b/src/doc/unstable-book/src/compiler-barriers.md
@@ -1,8 +1,8 @@
 # `compiler_barriers`
 
-The tracking issue for this feature is: [#41092]
+The tracking issue for this feature is: [#41091]
 
-[#41092]: https://github.com/rust-lang/rust/issues/41092
+[#41091]: https://github.com/rust-lang/rust/issues/41091
 
 ------------------------
 

From 5202ac57531ca42d26a778d99165f22db3c61632 Mon Sep 17 00:00:00 2001
From: Jon Gjengset <jon@thesquareplanet.com>
Date: Thu, 6 Apr 2017 23:29:16 -0400
Subject: [PATCH 4/7] Correct book examples for hardware re-ordering

---
 .../unstable-book/src/compiler-barriers.md    | 116 ++++++++++--------
 1 file changed, 62 insertions(+), 54 deletions(-)

diff --git a/src/doc/unstable-book/src/compiler-barriers.md b/src/doc/unstable-book/src/compiler-barriers.md
index 5a5c539609c72..3108494aa79f0 100644
--- a/src/doc/unstable-book/src/compiler-barriers.md
+++ b/src/doc/unstable-book/src/compiler-barriers.md
@@ -21,78 +21,86 @@ A `compiler_barrier` restricts the kinds of memory re-ordering the
 compiler is allowed to do. Specifically, depending on the given ordering
 semantics, the compiler may be disallowed from moving reads or writes
 from before or after the call to the other side of the call to
-`compiler_barrier`.
+`compiler_barrier`. Note that it does **not** prevent the *hardware*
+from doing such re-orderings -- for that, the `volatile_*` class of
+functions, or full memory fences, need to be used.
 
 ## Examples
 
-The need to prevent re-ordering of reads and writes often arises when
-working with low-level devices. Consider a piece of code that interacts
-with an ethernet card with a set of internal registers that are accessed
-through an address port register (`a: &mut usize`) and a data port
-register (`d: &usize`). To read internal register 5, the following code
-might then be used:
+`compiler_barrier` is generally only useful for preventing a thread from
+racing *with itself*. That is, if a given thread is executing one piece
+of code, and is then interrupted, and starts executing code elsewhere
+(while still in the same thread, and conceptually still on the same
+core). In traditional programs, this can only occur when a signal
+handler is registered. Consider the following code:
 
 ```rust
-fn read_fifth(a: &mut usize, d: &usize) -> usize {
-    *a = 5;
-    *d
+#use std::sync::atomic::{AtomicBool, AtomicUsize};
+#use std::sync::atomic::{ATOMIC_BOOL_INIT, ATOMIC_USIZE_INIT};
+#use std::sync::atomic::Ordering;
+static IMPORTANT_VARIABLE: AtomicUsize = ATOMIC_USIZE_INIT;
+static IS_READY: AtomicBool = ATOMIC_BOOL_INIT;
+
+fn main() {
+    IMPORTANT_VARIABLE.store(42, Ordering::Relaxed);
+    IS_READY.store(true, Ordering::Relaxed);
+}
+
+fn signal_handler() {
+    if IS_READY.load(Ordering::Relaxed) {
+        assert_eq!(IMPORTANT_VARIABLE.load(Ordering::Relaxed), 42);
+    }
 }
 ```
 
-In this case, the compiler is free to re-order these two statements if
-it thinks doing so might result in better performance, register use, or
-anything else compilers care about. However, in doing so, it would break
-the code, as `x` would be set to the value of some other device
-register!
+The way it is currently written, the `assert_eq!` is *not* guaranteed to
+succeed, despite everything happening in a single thread. To see why,
+remember that the compiler is free to swap the stores to
+`IMPORTANT_VARIABLE` and `IS_READ` since they are both
+`Ordering::Relaxed`. If it does, and the signal handler is invoked right
+after `IS_READY` is updated, then the signal handler will see
+`IS_READY=1`, but `IMPORTANT_VARIABLE=0`.
 
-By inserting a compiler barrier, we can force the compiler to not
-re-arrange these two statements, making the code function correctly
-again:
+Using a `compiler_barrier`, we can remedy this situation:
 
 ```rust
 #![feature(compiler_barriers)]
-use std::sync::atomic;
-
-fn read_fifth(a: &mut usize, d: &usize) -> usize {
-    *a = 5;
-    atomic::compiler_barrier(atomic::Ordering::SeqCst);
-    *d
+#use std::sync::atomic::{AtomicBool, AtomicUsize};
+#use std::sync::atomic::{ATOMIC_BOOL_INIT, ATOMIC_USIZE_INIT};
+#use std::sync::atomic::Ordering;
+use std::sync::atomic::compiler_barrier;
+
+static IMPORTANT_VARIABLE: AtomicUsize = ATOMIC_USIZE_INIT;
+static IS_READY: AtomicBool = ATOMIC_BOOL_INIT;
+
+fn main() {
+    IMPORTANT_VARIABLE.store(42, Ordering::Relaxed);
+    // prevent earlier writes from being moved beyond this point
+    compiler_barrier(Ordering::Release);
+    IS_READY.store(true, Ordering::Relaxed);
 }
-```
-
-Compiler barriers are also useful in code that implements low-level
-synchronization primitives. Consider a structure with two different
-atomic variables, with a dependency chain between them:
 
-```rust
-use std::sync::atomic;
-
-fn thread1(x: &atomic::AtomicUsize, y: &atomic::AtomicUsize) {
-    x.store(1, atomic::Ordering::Release);
-    let v1 = y.load(atomic::Ordering::Acquire);
-}
-fn thread2(x: &atomic::AtomicUsize, y: &atomic::AtomicUsize) {
-    y.store(1, atomic::Ordering::Release);
-    let v2 = x.load(atomic::Ordering::Acquire);
+fn signal_handler() {
+    if IS_READY.load(Ordering::Relaxed) {
+        assert_eq!(IMPORTANT_VARIABLE.load(Ordering::Relaxed), 42);
+    }
 }
 ```
 
-This code will guarantee that `thread1` sees any writes to `y` made by
-`thread2`, and that `thread2` sees any writes to `x`. Intuitively, one
-might also expect that if `thread2` sees `v2 == 0`, `thread1` must see
-`v1 == 1` (since `thread2`'s store happened before its `load`, and its
-load did not see `thread1`'s store). However, the code as written does
-*not* guarantee this, because the compiler is allowed to re-order the
-store and load within each thread. To enforce this particular behavior,
-a call to `compiler_barrier(Ordering::SeqCst)` would need to be inserted
-between the `store` and `load` in both functions.
-
-Compiler barriers with weaker re-ordering semantics (such as
-`Ordering::Acquire`) can also be useful, but are beyond the scope of
-this text. Curious readers are encouraged to read the Linux kernel's
-discussion of [memory barriers][1], as well as C++ references on
-[`std::memory_order`][2] and [`atomic_signal_fence`][3].
+In more advanced cases (for example, if `IMPORTANT_VARIABLE` was an
+`AtomicPtr` that starts as `NULL`), it may also be unsafe for the
+compiler to hoist code using `IMPORTANT_VARIABLE` above the
+`IS_READY.load`. In that case, a `compiler_barrier(Ordering::Acquire)`
+should be placed at the top of the `if` to prevent this optimizations.
+
+A deeper discussion of compiler barriers with various re-ordering
+semantics (such as `Ordering::SeqCst`) is beyond the scope of this text.
+Curious readers are encouraged to read the Linux kernel's discussion of
+[memory barriers][1], the C++ references on [`std::memory_order`][2] and
+[`atomic_signal_fence`][3], and [this StackOverflow answer][4] for
+further details.
 
 [1]: https://www.kernel.org/doc/Documentation/memory-barriers.txt
 [2]: http://en.cppreference.com/w/cpp/atomic/memory_order
 [3]: http://www.cplusplus.com/reference/atomic/atomic_signal_fence/
+[4]: http://stackoverflow.com/a/18454971/472927

From 8fc3ab20b01044cfebfcd963ef7b69cdb11381b2 Mon Sep 17 00:00:00 2001
From: Jon Gjengset <jon@thesquareplanet.com>
Date: Fri, 7 Apr 2017 09:11:07 -0400
Subject: [PATCH 5/7] rustdoc needs space after # to ignore

---
 src/doc/unstable-book/src/compiler-barriers.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/doc/unstable-book/src/compiler-barriers.md b/src/doc/unstable-book/src/compiler-barriers.md
index 3108494aa79f0..df0e20eefadbc 100644
--- a/src/doc/unstable-book/src/compiler-barriers.md
+++ b/src/doc/unstable-book/src/compiler-barriers.md
@@ -35,9 +35,9 @@ core). In traditional programs, this can only occur when a signal
 handler is registered. Consider the following code:
 
 ```rust
-#use std::sync::atomic::{AtomicBool, AtomicUsize};
-#use std::sync::atomic::{ATOMIC_BOOL_INIT, ATOMIC_USIZE_INIT};
-#use std::sync::atomic::Ordering;
+# use std::sync::atomic::{AtomicBool, AtomicUsize};
+# use std::sync::atomic::{ATOMIC_BOOL_INIT, ATOMIC_USIZE_INIT};
+# use std::sync::atomic::Ordering;
 static IMPORTANT_VARIABLE: AtomicUsize = ATOMIC_USIZE_INIT;
 static IS_READY: AtomicBool = ATOMIC_BOOL_INIT;
 
@@ -65,9 +65,9 @@ Using a `compiler_barrier`, we can remedy this situation:
 
 ```rust
 #![feature(compiler_barriers)]
-#use std::sync::atomic::{AtomicBool, AtomicUsize};
-#use std::sync::atomic::{ATOMIC_BOOL_INIT, ATOMIC_USIZE_INIT};
-#use std::sync::atomic::Ordering;
+# use std::sync::atomic::{AtomicBool, AtomicUsize};
+# use std::sync::atomic::{ATOMIC_BOOL_INIT, ATOMIC_USIZE_INIT};
+# use std::sync::atomic::Ordering;
 use std::sync::atomic::compiler_barrier;
 
 static IMPORTANT_VARIABLE: AtomicUsize = ATOMIC_USIZE_INIT;

From e6597e12bc40a28ed4c682e71f7159b9cb2403e7 Mon Sep 17 00:00:00 2001
From: Jon Gjengset <jon@thesquareplanet.com>
Date: Fri, 7 Apr 2017 09:13:06 -0400
Subject: [PATCH 6/7] Mention interrupts and green threads

---
 src/doc/unstable-book/src/compiler-barriers.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/doc/unstable-book/src/compiler-barriers.md b/src/doc/unstable-book/src/compiler-barriers.md
index df0e20eefadbc..9e137a5c4f878 100644
--- a/src/doc/unstable-book/src/compiler-barriers.md
+++ b/src/doc/unstable-book/src/compiler-barriers.md
@@ -32,7 +32,12 @@ racing *with itself*. That is, if a given thread is executing one piece
 of code, and is then interrupted, and starts executing code elsewhere
 (while still in the same thread, and conceptually still on the same
 core). In traditional programs, this can only occur when a signal
-handler is registered. Consider the following code:
+handler is registered. In more low-level code, such situations can also
+arise when handling interrupts, when implementing green threads with
+pre-emption, etc.
+
+To give a straightforward example of when a `compiler_barrier` is
+necessary, consider the following example:
 
 ```rust
 # use std::sync::atomic::{AtomicBool, AtomicUsize};

From f093d59c31bd2064328e24d0ec76e0d105fc32fc Mon Sep 17 00:00:00 2001
From: Jon Gjengset <jon@thesquareplanet.com>
Date: Sat, 8 Apr 2017 14:03:17 -0400
Subject: [PATCH 7/7] Address @parched's comments

---
 src/doc/unstable-book/src/compiler-barriers.md | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/doc/unstable-book/src/compiler-barriers.md b/src/doc/unstable-book/src/compiler-barriers.md
index 9e137a5c4f878..827447f0bd510 100644
--- a/src/doc/unstable-book/src/compiler-barriers.md
+++ b/src/doc/unstable-book/src/compiler-barriers.md
@@ -22,8 +22,9 @@ compiler is allowed to do. Specifically, depending on the given ordering
 semantics, the compiler may be disallowed from moving reads or writes
 from before or after the call to the other side of the call to
 `compiler_barrier`. Note that it does **not** prevent the *hardware*
-from doing such re-orderings -- for that, the `volatile_*` class of
-functions, or full memory fences, need to be used.
+from doing such re-ordering. This is not a problem in a single-threaded,
+execution context, but when other threads may modify memory at the same
+time, stronger synchronization primitives are required.
 
 ## Examples
 
@@ -92,12 +93,6 @@ fn signal_handler() {
 }
 ```
 
-In more advanced cases (for example, if `IMPORTANT_VARIABLE` was an
-`AtomicPtr` that starts as `NULL`), it may also be unsafe for the
-compiler to hoist code using `IMPORTANT_VARIABLE` above the
-`IS_READY.load`. In that case, a `compiler_barrier(Ordering::Acquire)`
-should be placed at the top of the `if` to prevent this optimizations.
-
 A deeper discussion of compiler barriers with various re-ordering
 semantics (such as `Ordering::SeqCst`) is beyond the scope of this text.
 Curious readers are encouraged to read the Linux kernel's discussion of