Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a fast-path to Debug ASCII &str #121150

Merged
merged 5 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions library/core/benches/str/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ fn ascii_escapes(b: &mut Bencher) {
assert_fmt(
s,
r#""some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte""#,
21,
15,
);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
Expand Down Expand Up @@ -72,7 +72,7 @@ fn mostly_unicode(b: &mut Bencher) {
#[bench]
fn mixed(b: &mut Bencher) {
let s = "\"❤️\"\n\"hűha ez betű\"\n\"кириллических букв\".";
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 36);
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 21);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
});
Expand Down
59 changes: 41 additions & 18 deletions library/core/src/fmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2399,23 +2399,47 @@ impl Display for bool {
impl Debug for str {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.write_char('"')?;
let mut from = 0;
for (i, c) in self.char_indices() {
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: false,
escape_double_quote: true,
});
// If char needs escaping, flush backlog so far and write, else skip
if esc.len() != 1 {
f.write_str(&self[from..i])?;
for c in esc {
f.write_char(c)?;

// substring we know is printable
let mut printable_range = 0..0;

fn needs_escape(b: u8) -> bool {
b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
}

// the loop here first skips over runs of printable ASCII as a fast path.
// other chars (unicode, or ASCII that needs escaping) are then handled per-`char`.
let mut rest = self;
while rest.len() > 0 {
let Some(non_printable_start) = rest.as_bytes().iter().position(|&b| needs_escape(b))
else {
printable_range.end += rest.len();
break;
};

printable_range.end += non_printable_start;
// SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
rest = unsafe { rest.get_unchecked(non_printable_start..) };

let mut chars = rest.chars();
if let Some(c) = chars.next() {
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: false,
escape_double_quote: true,
});
if esc.len() != 1 {
f.write_str(&self[printable_range.clone()])?;
Display::fmt(&esc, f)?;
printable_range.start = printable_range.end + c.len_utf8();
}
from = i + c.len_utf8();
printable_range.end += c.len_utf8();
}
rest = chars.as_str();
}
f.write_str(&self[from..])?;

f.write_str(&self[printable_range])?;

f.write_char('"')
}
}
Expand All @@ -2431,13 +2455,12 @@ impl Display for str {
impl Debug for char {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.write_char('\'')?;
for c in self.escape_debug_ext(EscapeDebugExtArgs {
let esc = self.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: true,
escape_double_quote: false,
}) {
f.write_char(c)?
}
});
Display::fmt(&esc, f)?;
f.write_char('\'')
}
}
Expand Down
Loading