rust-lang · bors · Dec 11, 2023 · Oct 24, 2023 · Oct 25, 2023 · Nov 18, 2023
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -10,7 +10,7 @@ use crate::value::Value;
 use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
 use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization};
-use rustc_codegen_ssa::mir::operand::OperandRef;
+use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::*;
 use rustc_hir as hir;
@@ -946,6 +946,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), callee_ty.fn_sig(tcx));
     let arg_tys = sig.inputs();
 
+    // Vectors must be immediates (non-power-of-2 #[repr(packed)] are not)
+    for (ty, arg) in arg_tys.iter().zip(args) {
+        if ty.is_simd() && !matches!(arg.val, OperandValue::Immediate(_)) {
+            return_error!(InvalidMonomorphization::SimdArgument { span, name, ty: *ty });
+        }
+    }
+
     if name == sym::simd_select_bitmask {
         let (len, _) = require_simd!(arg_tys[1], SimdArgument);
 

diff --git a/compiler/rustc_ty_utils/src/layout.rs b/compiler/rustc_ty_utils/src/layout.rs
@@ -435,7 +435,21 @@ fn layout_of_uncached<'tcx>(
                 .size
                 .checked_mul(e_len, dl)
                 .ok_or_else(|| error(cx, LayoutError::SizeOverflow(ty)))?;
-            let align = dl.vector_align(size);
+
+            let (abi, align) = if def.repr().packed() && !e_len.is_power_of_two() {
+                // Non-power-of-two vectors have padding up to the next power-of-two.
+                // If we're a packed repr, remove the padding while keeping the alignment as close
+                // to a vector as possible.
+                (
+                    Abi::Aggregate { sized: true },
+                    AbiAndPrefAlign {
+                        abi: Align::max_for_offset(size),
+                        pref: dl.vector_align(size).pref,
+                    },
+                )
+            } else {
+                (Abi::Vector { element: e_abi, count: e_len }, dl.vector_align(size))
+            };
             let size = size.align_to(align.abi);
 
             // Compute the placement of the vector fields:
@@ -448,7 +462,7 @@ fn layout_of_uncached<'tcx>(
             tcx.mk_layout(LayoutS {
                 variants: Variants::Single { index: FIRST_VARIANT },
                 fields,
-                abi: Abi::Vector { element: e_abi, count: e_len },
+                abi,
                 largest_niche: e_ly.largest_niche,
                 size,
                 align,

diff --git a/tests/ui/simd/repr_packed.rs b/tests/ui/simd/repr_packed.rs
@@ -0,0 +1,59 @@
+// run-pass
+
+#![feature(repr_simd, platform_intrinsics)]
+#![allow(non_camel_case_types)]
+
+#[repr(simd, packed)]
+struct Simd<T, const N: usize>([T; N]);
+
+#[repr(simd)]
+struct FullSimd<T, const N: usize>([T; N]);
+
+fn check_size_align<T, const N: usize>() {
+    use std::mem;
+    assert_eq!(mem::size_of::<Simd<T, N>>(), mem::size_of::<[T; N]>());
+    assert_eq!(mem::size_of::<Simd<T, N>>() % mem::align_of::<Simd<T, N>>(), 0);
+}
+
+fn check_ty<T>() {
+    check_size_align::<T, 1>();
+    check_size_align::<T, 2>();
+    check_size_align::<T, 3>();
+    check_size_align::<T, 4>();
+    check_size_align::<T, 8>();
+    check_size_align::<T, 9>();
+    check_size_align::<T, 15>();
+}
+
+extern "platform-intrinsic" {
+    fn simd_add<T>(a: T, b: T) -> T;
+}
+
+fn main() {
+    check_ty::<u8>();
+    check_ty::<i16>();
+    check_ty::<u32>();
+    check_ty::<i64>();
+    check_ty::<usize>();
+    check_ty::<f32>();
+    check_ty::<f64>();
+
+    unsafe {
+        // powers-of-two have no padding and work as usual
+        let x: Simd<f64, 4> =
+            simd_add(Simd::<f64, 4>([0., 1., 2., 3.]), Simd::<f64, 4>([2., 2., 2., 2.]));
+        assert_eq!(std::mem::transmute::<_, [f64; 4]>(x), [2., 3., 4., 5.]);
+
+        // non-powers-of-two have padding and need to be expanded to full vectors
+        fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
+            unsafe {
+                let mut tmp = core::mem::MaybeUninit::<FullSimd<T, N>>::uninit();
+                std::ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
+                tmp.assume_init()
+            }
+        }
+        let x: FullSimd<f64, 3> =
+            simd_add(load(Simd::<f64, 3>([0., 1., 2.])), load(Simd::<f64, 3>([2., 2., 2.])));
+        assert_eq!(x.0, [2., 3., 4.]);
+    }
+}