Auto merge of rust-lang#72460 - RalfJung:rollup-28fs06y, r=RalfJung

Rollup of 4 pull requests Successful merges: - rust-lang#71610 (InvalidUndefBytes: Track size of undef region used) - rust-lang#72161 (Replace fcntl-based file lock with flock) - rust-lang#72306 (Break tokens before checking if they are 'probably equal') - rust-lang#72325 (Always generated object code for `#![no_builtins]`) Failed merges: r? @ghost
rust-lang-ci · May 22, 2020 · a9ca1ec · a9ca1ec
2 parents de6060b + 1119421
commit a9ca1ec
Show file tree

Hide file tree

Showing 11 changed files with 252 additions and 54 deletions.
diff --git a/src/librustc_ast/tokenstream.rs b/src/librustc_ast/tokenstream.rs
@@ -21,6 +21,8 @@ use rustc_macros::HashStable_Generic;
 use rustc_span::{Span, DUMMY_SP};
 use smallvec::{smallvec, SmallVec};
 
+use log::debug;
+
 use std::{iter, mem};
 
 /// When the main rust parser encounters a syntax-extension invocation, it
@@ -338,8 +340,71 @@ impl TokenStream {
             true
         }
 
-        let mut t1 = self.trees().filter(semantic_tree);
-        let mut t2 = other.trees().filter(semantic_tree);
+        // When comparing two `TokenStream`s, we ignore the `IsJoint` information.
+        //
+        // However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
+        // use `Token.glue` on adjacent tokens with the proper `IsJoint`.
+        // Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
+        // and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
+        // when determining if two `TokenStream`s are 'probably equal'.
+        //
+        // Therefore, we use `break_two_token_op` to convert all tokens
+        // to the 'unglued' form (if it exists). This ensures that two
+        // `TokenStream`s which differ only in how their tokens are glued
+        // will be considered 'probably equal', which allows us to keep spans.
+        //
+        // This is important when the original `TokenStream` contained
+        // extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
+        // will be omitted when we pretty-print, which can cause the original
+        // and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
+        // leading to some tokens being 'glued' together in one stream but not
+        // the other. See #68489 for more details.
+        fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
+            // In almost all cases, we should have either zero or one levels
+            // of 'unglueing'. However, in some unusual cases, we may need
+            // to iterate breaking tokens mutliple times. For example:
+            // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
+            let mut token_trees: SmallVec<[_; 2]>;
+            if let TokenTree::Token(token) = &tree {
+                let mut out = SmallVec::<[_; 2]>::new();
+                out.push(token.clone());
+                // Iterate to fixpoint:
+                // * We start off with 'out' containing our initial token, and `temp` empty
+                // * If we are able to break any tokens in `out`, then `out` will have
+                //   at least one more element than 'temp', so we will try to break tokens
+                //   again.
+                // * If we cannot break any tokens in 'out', we are done
+                loop {
+                    let mut temp = SmallVec::<[_; 2]>::new();
+                    let mut changed = false;
+
+                    for token in out.into_iter() {
+                        if let Some((first, second)) = token.kind.break_two_token_op() {
+                            temp.push(Token::new(first, DUMMY_SP));
+                            temp.push(Token::new(second, DUMMY_SP));
+                            changed = true;
+                        } else {
+                            temp.push(token);
+                        }
+                    }
+                    out = temp;
+                    if !changed {
+                        break;
+                    }
+                }
+                token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
+                if token_trees.len() != 1 {
+                    debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
+                }
+            } else {
+                token_trees = SmallVec::new();
+                token_trees.push(tree);
+            }
+            token_trees.into_iter()
+        }
+
+        let mut t1 = self.trees().filter(semantic_tree).flat_map(break_tokens);
+        let mut t2 = other.trees().filter(semantic_tree).flat_map(break_tokens);
         for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
             if !t1.probably_equal_for_proc_macro(&t2) {
                 return false;

diff --git a/src/librustc_codegen_ssa/back/write.rs b/src/librustc_codegen_ssa/back/write.rs
@@ -142,8 +142,22 @@ impl ModuleConfig {
         let emit_obj = if !should_emit_obj {
             EmitObj::None
         } else if sess.target.target.options.obj_is_bitcode
-            || sess.opts.cg.linker_plugin_lto.enabled()
+            || (sess.opts.cg.linker_plugin_lto.enabled() && !no_builtins)
         {
+            // This case is selected if the target uses objects as bitcode, or
+            // if linker plugin LTO is enabled. In the linker plugin LTO case
+            // the assumption is that the final link-step will read the bitcode
+            // and convert it to object code. This may be done by either the
+            // native linker or rustc itself.
+            //
+            // Note, however, that the linker-plugin-lto requested here is
+            // explicitly ignored for `#![no_builtins]` crates. These crates are
+            // specifically ignored by rustc's LTO passes and wouldn't work if
+            // loaded into the linker. These crates define symbols that LLVM
+            // lowers intrinsics to, and these symbol dependencies aren't known
+            // until after codegen. As a result any crate marked
+            // `#![no_builtins]` is assumed to not participate in LTO and
+            // instead goes on to generate object code.
             EmitObj::Bitcode
         } else if need_bitcode_in_object(sess) {
             EmitObj::ObjectCode(BitcodeSection::Full)

diff --git a/src/librustc_data_structures/flock.rs b/src/librustc_data_structures/flock.rs
@@ -7,18 +7,22 @@
 #![allow(non_camel_case_types)]
 #![allow(nonstandard_style)]
 
+use std::fs::{File, OpenOptions};
 use std::io;
 use std::path::Path;
 
 cfg_if! {
-    if #[cfg(unix)] {
-        use std::ffi::{CString, OsStr};
-        use std::mem;
+    // We use `flock` rather than `fcntl` on Linux, because WSL1 does not support
+    // `fcntl`-style advisory locks properly (rust-lang/rust#72157).
+    //
+    // For other Unix targets we still use `fcntl` because it's more portable than
+    // `flock`.
+    if #[cfg(target_os = "linux")] {
         use std::os::unix::prelude::*;
 
         #[derive(Debug)]
         pub struct Lock {
-            fd: libc::c_int,
+            _file: File,
         }
 
         impl Lock {
@@ -27,22 +31,55 @@ cfg_if! {
                        create: bool,
                        exclusive: bool)
                        -> io::Result<Lock> {
-                let os: &OsStr = p.as_ref();
-                let buf = CString::new(os.as_bytes()).unwrap();
-                let open_flags = if create {
-                    libc::O_RDWR | libc::O_CREAT
+                let file = OpenOptions::new()
+                    .read(true)
+                    .write(true)
+                    .create(create)
+                    .mode(libc::S_IRWXU as u32)
+                    .open(p)?;
+
+                let mut operation = if exclusive {
+                    libc::LOCK_EX
                 } else {
-                    libc::O_RDWR
-                };
-
-                let fd = unsafe {
-                    libc::open(buf.as_ptr(), open_flags,
-                               libc::S_IRWXU as libc::c_int)
+                    libc::LOCK_SH
                 };
+                if !wait {
+                    operation |= libc::LOCK_NB
+                }
 
-                if fd < 0 {
-                    return Err(io::Error::last_os_error());
+                let ret = unsafe { libc::flock(file.as_raw_fd(), operation) };
+                if ret == -1 {
+                    Err(io::Error::last_os_error())
+                } else {
+                    Ok(Lock { _file: file })
                 }
+            }
+        }
+
+        // Note that we don't need a Drop impl to execute `flock(fd, LOCK_UN)`. Lock acquired by
+        // `flock` is associated with the file descriptor and closing the file release it
+        // automatically.
+    } else if #[cfg(unix)] {
+        use std::mem;
+        use std::os::unix::prelude::*;
+
+        #[derive(Debug)]
+        pub struct Lock {
+            file: File,
+        }
+
+        impl Lock {
+            pub fn new(p: &Path,
+                       wait: bool,
+                       create: bool,
+                       exclusive: bool)
+                       -> io::Result<Lock> {
+                let file = OpenOptions::new()
+                    .read(true)
+                    .write(true)
+                    .create(create)
+                    .mode(libc::S_IRWXU as u32)
+                    .open(p)?;
 
                 let lock_type = if exclusive {
                     libc::F_WRLCK
@@ -58,14 +95,12 @@ cfg_if! {
 
                 let cmd = if wait { libc::F_SETLKW } else { libc::F_SETLK };
                 let ret = unsafe {
-                    libc::fcntl(fd, cmd, &flock)
+                    libc::fcntl(file.as_raw_fd(), cmd, &flock)
                 };
                 if ret == -1 {
-                    let err = io::Error::last_os_error();
-                    unsafe { libc::close(fd); }
-                    Err(err)
+                    Err(io::Error::last_os_error())
                 } else {
-                    Ok(Lock { fd })
+                    Ok(Lock { file })
                 }
             }
         }
@@ -79,15 +114,13 @@ cfg_if! {
                 flock.l_len = 0;
 
                 unsafe {
-                    libc::fcntl(self.fd, libc::F_SETLK, &flock);
-                    libc::close(self.fd);
+                    libc::fcntl(self.file.as_raw_fd(), libc::F_SETLK, &flock);
                 }
             }
         }
     } else if #[cfg(windows)] {
         use std::mem;
         use std::os::windows::prelude::*;
-        use std::fs::{File, OpenOptions};
 
         use winapi::um::minwinbase::{OVERLAPPED, LOCKFILE_FAIL_IMMEDIATELY, LOCKFILE_EXCLUSIVE_LOCK};
         use winapi::um::fileapi::LockFileEx;

diff --git a/src/librustc_middle/mir/interpret/allocation.rs b/src/librustc_middle/mir/interpret/allocation.rs
@@ -11,6 +11,7 @@ use rustc_target::abi::{Align, HasDataLayout, Size};
 
 use super::{
     read_target_uint, write_target_uint, AllocId, InterpResult, Pointer, Scalar, ScalarMaybeUninit,
+    UninitBytesAccess,
 };
 
 #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
@@ -545,17 +546,23 @@ impl<'tcx, Tag: Copy, Extra> Allocation<Tag, Extra> {
 impl<'tcx, Tag: Copy, Extra> Allocation<Tag, Extra> {
     /// Checks whether the given range  is entirely defined.
     ///
-    /// Returns `Ok(())` if it's defined. Otherwise returns the index of the byte
-    /// at which the first undefined access begins.
-    fn is_defined(&self, ptr: Pointer<Tag>, size: Size) -> Result<(), Size> {
+    /// Returns `Ok(())` if it's defined. Otherwise returns the range of byte
+    /// indexes of the first contiguous undefined access.
+    fn is_defined(&self, ptr: Pointer<Tag>, size: Size) -> Result<(), Range<Size>> {
         self.init_mask.is_range_initialized(ptr.offset, ptr.offset + size) // `Size` addition
     }
 
-    /// Checks that a range of bytes is defined. If not, returns the `ReadUndefBytes`
-    /// error which will report the first byte which is undefined.
+    /// Checks that a range of bytes is defined. If not, returns the `InvalidUndefBytes`
+    /// error which will report the first range of bytes which is undefined.
     fn check_defined(&self, ptr: Pointer<Tag>, size: Size) -> InterpResult<'tcx> {
-        self.is_defined(ptr, size)
-            .or_else(|idx| throw_ub!(InvalidUninitBytes(Some(Pointer::new(ptr.alloc_id, idx)))))
+        self.is_defined(ptr, size).or_else(|idx_range| {
+            throw_ub!(InvalidUninitBytes(Some(Box::new(UninitBytesAccess {
+                access_ptr: ptr.erase_tag(),
+                access_size: size,
+                uninit_ptr: Pointer::new(ptr.alloc_id, idx_range.start),
+                uninit_size: idx_range.end - idx_range.start, // `Size` subtraction
+            }))))
+        })
     }
 
     pub fn mark_definedness(&mut self, ptr: Pointer<Tag>, size: Size, new_state: bool) {
@@ -758,19 +765,25 @@ impl InitMask {
 
     /// Checks whether the range `start..end` (end-exclusive) is entirely initialized.
     ///
-    /// Returns `Ok(())` if it's initialized. Otherwise returns the index of the byte
-    /// at which the first uninitialized access begins.
+    /// Returns `Ok(())` if it's initialized. Otherwise returns a range of byte
+    /// indexes for the first contiguous span of the uninitialized access.
     #[inline]
-    pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), Size> {
+    pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), Range<Size>> {
         if end > self.len {
-            return Err(self.len);
+            return Err(self.len..end);
         }
 
         // FIXME(oli-obk): optimize this for allocations larger than a block.
         let idx = (start.bytes()..end.bytes()).map(Size::from_bytes).find(|&i| !self.get(i));
 
         match idx {
-            Some(idx) => Err(idx),
+            Some(idx) => {
+                let undef_end = (idx.bytes()..end.bytes())
+                    .map(Size::from_bytes)
+                    .find(|&i| self.get(i))
+                    .unwrap_or(end);
+                Err(idx..undef_end)
+            }
             None => Ok(()),
         }
     }

diff --git a/src/librustc_middle/mir/interpret/error.rs b/src/librustc_middle/mir/interpret/error.rs
@@ -6,7 +6,7 @@ use crate::ty::query::TyCtxtAt;
 use crate::ty::{self, layout, tls, FnSig, Ty};
 
 use rustc_data_structures::sync::Lock;
-use rustc_errors::{struct_span_err, DiagnosticBuilder, ErrorReported};
+use rustc_errors::{pluralize, struct_span_err, DiagnosticBuilder, ErrorReported};
 use rustc_hir as hir;
 use rustc_hir::definitions::DefPathData;
 use rustc_macros::HashStable;
@@ -327,6 +327,19 @@ impl fmt::Display for CheckInAllocMsg {
     }
 }
 
+/// Details of an access to uninitialized bytes where it is not allowed.
+#[derive(Debug)]
+pub struct UninitBytesAccess {
+    /// Location of the original memory access.
+    pub access_ptr: Pointer,
+    /// Size of the original memory access.
+    pub access_size: Size,
+    /// Location of the first uninitialized byte that was accessed.
+    pub uninit_ptr: Pointer,
+    /// Number of consecutive uninitialized bytes that were accessed.
+    pub uninit_size: Size,
+}
+
 /// Error information for when the program caused Undefined Behavior.
 pub enum UndefinedBehaviorInfo<'tcx> {
     /// Free-form case. Only for errors that are never caught!
@@ -384,7 +397,7 @@ pub enum UndefinedBehaviorInfo<'tcx> {
     /// Using a string that is not valid UTF-8,
     InvalidStr(std::str::Utf8Error),
     /// Using uninitialized data where it is not allowed.
-    InvalidUninitBytes(Option<Pointer>),
+    InvalidUninitBytes(Option<Box<UninitBytesAccess>>),
     /// Working with a local that is not currently live.
     DeadLocal,
     /// Data size is not equal to target size.
@@ -455,10 +468,18 @@ impl fmt::Display for UndefinedBehaviorInfo<'_> {
                 write!(f, "using {} as function pointer but it does not point to a function", p)
             }
             InvalidStr(err) => write!(f, "this string is not valid UTF-8: {}", err),
-            InvalidUninitBytes(Some(p)) => write!(
+            InvalidUninitBytes(Some(access)) => write!(
                 f,
-                "reading uninitialized memory at {}, but this operation requires initialized memory",
-                p
+                "reading {} byte{} of memory starting at {}, \
+                 but {} byte{} {} uninitialized starting at {}, \
+                 and this operation requires initialized memory",
+                access.access_size.bytes(),
+                pluralize!(access.access_size.bytes()),
+                access.access_ptr,
+                access.uninit_size.bytes(),
+                pluralize!(access.uninit_size.bytes()),
+                if access.uninit_size.bytes() != 1 { "are" } else { "is" },
+                access.uninit_ptr,
             ),
             InvalidUninitBytes(None) => write!(
                 f,
@@ -556,6 +577,9 @@ impl dyn MachineStopType {
     }
 }
 
+#[cfg(target_arch = "x86_64")]
+static_assert_size!(InterpError<'_>, 40);
+
 pub enum InterpError<'tcx> {
     /// The program caused undefined behavior.
     UndefinedBehavior(UndefinedBehaviorInfo<'tcx>),
@@ -604,7 +628,10 @@ impl InterpError<'_> {
             InterpError::MachineStop(b) => mem::size_of_val::<dyn MachineStopType>(&**b) > 0,
             InterpError::Unsupported(UnsupportedOpInfo::Unsupported(_))
             | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::ValidationFailure(_))
-            | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::Ub(_)) => true,
+            | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::Ub(_))
+            | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::InvalidUninitBytes(Some(_))) => {
+                true
+            }
             _ => false,
         }
     }

diff --git a/src/librustc_middle/mir/interpret/mod.rs b/src/librustc_middle/mir/interpret/mod.rs
@@ -119,7 +119,7 @@ use crate::ty::{self, Instance, Ty, TyCtxt};
 pub use self::error::{
     struct_error, CheckInAllocMsg, ConstEvalErr, ConstEvalRawResult, ConstEvalResult, ErrorHandled,
     FrameInfo, InterpError, InterpErrorInfo, InterpResult, InvalidProgramInfo, MachineStopType,
-    ResourceExhaustionInfo, UndefinedBehaviorInfo, UnsupportedOpInfo,
+    ResourceExhaustionInfo, UndefinedBehaviorInfo, UninitBytesAccess, UnsupportedOpInfo,
 };
 
 pub use self::value::{get_slice_bytes, ConstValue, RawConst, Scalar, ScalarMaybeUninit};