Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor memchr to allow optimization #76971

Merged
merged 2 commits into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions library/core/src/slice/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,12 +268,14 @@ where
}

impl SliceContains for u8 {
#[inline]
fn slice_contains(&self, x: &[Self]) -> bool {
memchr::memchr(*self, x).is_some()
}
}

impl SliceContains for i8 {
#[inline]
fn slice_contains(&self, x: &[Self]) -> bool {
let byte = *self as u8;
// SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`
Expand Down
40 changes: 24 additions & 16 deletions library/core/src/slice/memchr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080;
// Use truncation.
const LO_USIZE: usize = LO_U64 as usize;
const HI_USIZE: usize = HI_U64 as usize;
const USIZE_BYTES: usize = mem::size_of::<usize>();

/// Returns `true` if `x` contains any zero byte.
///
Expand All @@ -38,19 +39,29 @@ fn repeat_byte(b: u8) -> usize {
}

/// Returns the first index matching the byte `x` in `text`.
#[inline]
pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
// Fast path for small slices
if text.len() < 2 * USIZE_BYTES {
return text.iter().position(|elt| *elt == x);
}

memchr_general_case(x, text)
}

fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
// Scan for a single byte value by reading two `usize` words at a time.
//
// Split `text` in three parts
// - unaligned initial part, before the first word aligned address in text
// - body, scan by 2 words at a time
// - the last remaining part, < 2 word size

// search up to an aligned boundary
let len = text.len();
let ptr = text.as_ptr();
let usize_bytes = mem::size_of::<usize>();
let mut offset = ptr.align_offset(USIZE_BYTES);

// search up to an aligned boundary
let mut offset = ptr.align_offset(usize_bytes);
if offset > 0 {
offset = cmp::min(offset, len);
if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
Expand All @@ -60,22 +71,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {

// search the body of the text
let repeated_x = repeat_byte(x);
while offset <= len - 2 * USIZE_BYTES {
unsafe {
let u = *(ptr.add(offset) as *const usize);
let v = *(ptr.add(offset + USIZE_BYTES) as *const usize);

if len >= 2 * usize_bytes {
while offset <= len - 2 * usize_bytes {
unsafe {
let u = *(ptr.add(offset) as *const usize);
let v = *(ptr.add(offset + usize_bytes) as *const usize);

// break if there is a matching byte
let zu = contains_zero_byte(u ^ repeated_x);
let zv = contains_zero_byte(v ^ repeated_x);
if zu || zv {
break;
}
// break if there is a matching byte
let zu = contains_zero_byte(u ^ repeated_x);
let zv = contains_zero_byte(v ^ repeated_x);
if zu || zv {
break;
}
offset += usize_bytes * 2;
}
offset += USIZE_BYTES * 2;
}

// Find the byte after the point the body loop stopped.
Expand Down
1 change: 1 addition & 0 deletions library/core/src/slice/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,7 @@ impl<T> [T] {
/// assert!(!v.iter().any(|e| e == "hi"));
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn contains(&self, x: &T) -> bool
where
T: PartialEq,
Expand Down
63 changes: 63 additions & 0 deletions src/test/codegen/issue-75659.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// This test checks that the call to memchr/slice_contains is optimized away
// when searching in small slices.

// compile-flags: -O
// only-x86_64

#![crate_type = "lib"]

// CHECK-LABEL: @foo1
#[no_mangle]
pub fn foo1(x: u8, data: &[u8; 1]) -> bool {
// CHECK-NOT: memchr
// CHECK-NOT: slice_contains
data.contains(&x)
}

// CHECK-LABEL: @foo2
#[no_mangle]
pub fn foo2(x: u8, data: &[u8; 2]) -> bool {
// CHECK-NOT: memchr
// CHECK-NOT: slice_contains
data.contains(&x)
}

// CHECK-LABEL: @foo3
#[no_mangle]
pub fn foo3(x: u8, data: &[u8; 3]) -> bool {
// CHECK-NOT: memchr
// CHECK-NOT: slice_contains
data.contains(&x)
}

// CHECK-LABEL: @foo4
#[no_mangle]
pub fn foo4(x: u8, data: &[u8; 4]) -> bool {
// CHECK-NOT: memchr
// CHECK-NOT: slice_contains
data.contains(&x)
}

// CHECK-LABEL: @foo8
#[no_mangle]
pub fn foo8(x: u8, data: &[u8; 8]) -> bool {
// CHECK-NOT: memchr
// CHECK-NOT: slice_contains
data.contains(&x)
}

// CHECK-LABEL: @foo8_i8
#[no_mangle]
pub fn foo8_i8(x: i8, data: &[i8; 8]) -> bool {
// CHECK-NOT: memchr
// CHECK-NOT: slice_contains
!data.contains(&x)
}

// Check that the general case isn't inlined
// CHECK-LABEL: @foo80
#[no_mangle]
pub fn foo80(x: u8, data: &[u8; 80]) -> bool {
// CHECK: call core::slice::memchr
data.contains(&x)
}