Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

Commit

Permalink
feat(rome_js_formatter): remove unnecessary escapes (#2804)
Browse files Browse the repository at this point in the history
  • Loading branch information
ematipico committed Jul 4, 2022
1 parent 0001b18 commit 957bb33
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 99 deletions.
12 changes: 2 additions & 10 deletions crates/rome_js_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,16 +457,8 @@ mod test {
// use this test check if your snippet prints as you wish, without using a snapshot
fn quick_test() {
let src = r#"
const MyComponentWithLongName2: React.VoidFunctionComponent<
MyComponentWithLongNameProps
> = ({
x,
}) => {
const a = useA();
return (
f
);
};
"hol\a"
'hol\c'
"#;
let syntax = SourceType::tsx();
let tree = parse(src, 0, syntax);
Expand Down
155 changes: 78 additions & 77 deletions crates/rome_js_formatter/src/utils/string_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ impl ToAsciiLowercaseCow for String {
}
}

const CHARACTERS_THAT_COULD_KEEP_THE_ESCAPE: [char; 4] = ['\\', '\'', '"', '\r'];

#[derive(Eq, PartialEq, Debug)]
pub(crate) enum StringLiteralParentKind {
/// Variant to track tokens that are inside an expression
Expand Down Expand Up @@ -194,6 +192,7 @@ impl FormatLiteralStringToken<'_> {
}

/// This signal is used to tell to the next character what it should do
#[derive(Eq, PartialEq)]
enum CharSignal {
/// There hasn't been any signal
None,
Expand Down Expand Up @@ -256,12 +255,13 @@ impl<'token> LiteralStringNormaliser<'token> {
self.token.token()
}

fn can_reduce_escapes(&self, string_information: &StringInformation) -> bool {
if matches!(self.token.parent_kind, StringLiteralParentKind::Directive) {
!string_information.raw_content_has_quotes
} else {
true
}
/// A string should be manipulated only if its raw content contains backslash or quotes
fn should_manipulate_string(&self, string_information: &StringInformation) -> bool {
let preferred_quote = string_information.preferred_quote;
let alternate_quote = preferred_quote.other();
let raw_content = self.raw_content();
raw_content.contains(['\\', preferred_quote.as_char(), alternate_quote.as_char()])
|| !matches!(self.token.parent_kind, StringLiteralParentKind::Directive)
}

fn normalise_directive(&mut self, string_information: &StringInformation) -> Cow<'token, str> {
Expand Down Expand Up @@ -389,88 +389,96 @@ impl<'token> LiteralStringNormaliser<'token> {
/// By default the formatter uses `\n` as a newline. The function replaces
/// `\r\n` with `\n`,
fn normalize_string(&self, string_information: &StringInformation) -> Cow<'token, str> {
let preferred_quote = string_information.preferred_quote;
let raw_content = self.raw_content();

if !self.should_manipulate_string(string_information) {
return Cow::Borrowed(raw_content);
}
let preferred_quote = string_information.preferred_quote;
let alternate_quote = preferred_quote.other();
let mut reduced_string = String::new();
let mut last_end = 0;
let mut signal = CharSignal::None;
let raw_content = self.raw_content();
let can_reduce_escapes = self.can_reduce_escapes(string_information);

for (start, part) in raw_content.match_indices(CHARACTERS_THAT_COULD_KEEP_THE_ESCAPE) {
if start - last_end >= 1 {
// This is the case where we don't have consecutive characters and if so, we have to reset the signal.
// An example is the following: " \\u2028 ' "
// After the two backslash, we have a character that is not a quote. So we reset the signal and we
// iterate over the single quote, we don't deal with any edge case.
signal = CharSignal::None;

let mut chars = raw_content.char_indices().peekable();

while let Some((_, current_char)) = chars.next() {
let next_character = chars.peek();

if let AlreadyPrinted(char) = signal {
if char == current_char {
continue;
}
}
reduced_string.push_str(&raw_content[last_end..start]);
last_end = start + part.len();

match part {
"\\" => {
if !can_reduce_escapes {
reduced_string.push_str(part);
continue;
}

match current_char {
'\\' => {
let bytes = raw_content.as_bytes();

match bytes[start] {
// TODO: #2444 add checks to additional characters to reduce the number of escapes
// "\a" VS "\n" => "a" VS "\n"
b'\\' => {
if start + 1 < bytes.len() {
// If we encounter an alternate quote that is escaped, we have to
// remove the escape from it.
// This is done because of how the enclosed strings can change.
// Check `computed_preferred_quote` for more details.
if bytes[start + 1] == alternate_quote.as_bytes()
if let Some((next_index, next_character)) = next_character {
// If we encounter an alternate quote that is escaped, we have to
// remove the escape from it.
// This is done because of how the enclosed strings can change.
// Check `computed_preferred_quote` for more details.
if *next_character as u8 == alternate_quote.as_bytes()
// This check is a safety net for cases where the backslash is at the end
// of the raw content:
// ("\\")
// The second backslash is at the end.
&& start + 2 <= bytes.len()
{
match signal {
CharSignal::Keep => {
reduced_string.push('\\');
}
_ => {
reduced_string.push(alternate_quote.as_char());
signal = AlreadyPrinted(alternate_quote.as_char());
}
}
} else {
// The next character is another backslash, let's signal
// the next iteration that it should keep it in the final string
if bytes[start + 1] == b'\\' {
signal = CharSignal::Keep;
}
// fallback, keep the backslash
reduced_string.push('\\');
&& *next_index < bytes.len()
{
match signal {
CharSignal::Keep => {
reduced_string.push(current_char);
}
_ => {
reduced_string.push(alternate_quote.as_char());
signal = AlreadyPrinted(alternate_quote.as_char());
}
} else {
// fallback, keep the backslash
reduced_string.push('\\');
}
} else if signal == CharSignal::Keep {
reduced_string.push(current_char);
}
// The next character is another backslash, or
// a character that should be kept in the next iteration
else if matches!(
next_character,
'\\' | 'v' | 'b' | 'f' | 'n' | 't' | 'r' | 'u' | 'x'
) {
signal = CharSignal::Keep;
// fallback, keep the backslash
reduced_string.push(current_char);
}
// these are character that should stay, but
// the next iteration should decide if to keep them or not
else if !next_character.is_alphabetic()
&& *next_character != alternate_quote.as_char()
&& *next_character != preferred_quote.as_char()
{
reduced_string.push(current_char);
} else {
// these, usually characters that can have their
// escape removed: "\a" => "a"
// So we ignore the current slash and we continue
// to the next iteration
continue;
}
_ => unreachable!("We checked already the presence of a backslash"),
} else {
// fallback, keep the backslash
reduced_string.push(current_char);
}
}
"\n" => {
'\n' | '\t' => {
if let AlreadyPrinted(the_char) = signal {
if the_char == '\n' {
if matches!(the_char, '\n' | '\t') {
signal = CharSignal::None
}
} else {
reduced_string.push('\n');
reduced_string.push(current_char);
}
}
// If the current character is \r and the
// next is \n, skip over the entire sequence
"\r" if raw_content[last_end..].starts_with('\n') => {
'\r' if next_character.map_or(false, |(_, c)| *c == '\n') => {
reduced_string.push('\n');
signal = AlreadyPrinted('\n');
}
Expand All @@ -479,22 +487,14 @@ impl<'token> LiteralStringNormaliser<'token> {
// an escaped version.
// This is done because of how the enclosed strings can change.
// Check `computed_preferred_quote` for more details.
if part == preferred_quote.as_string() {
if !can_reduce_escapes {
reduced_string.push_str(part);
continue;
}
if current_char == preferred_quote.as_char() {
let last_char = &reduced_string.chars().last();
if let Some('\\') = last_char {
reduced_string.push(preferred_quote.as_char());
} else {
reduced_string.push_str(preferred_quote.as_escaped());
}
} else if part == alternate_quote.as_string() {
if !can_reduce_escapes {
reduced_string.push_str(part);
continue;
}
} else if current_char == alternate_quote.as_char() {
match signal {
CharSignal::None => {
reduced_string.push(alternate_quote.as_char());
Expand All @@ -510,6 +510,8 @@ impl<'token> LiteralStringNormaliser<'token> {
}
}
}
} else {
reduced_string.push(current_char);
}
}
}
Expand All @@ -519,7 +521,6 @@ impl<'token> LiteralStringNormaliser<'token> {
if reduced_string.is_empty() {
Cow::Borrowed(raw_content)
} else {
reduced_string.push_str(&raw_content[last_end..raw_content.len()]);
// don't allocate a new string if the new string is still equals to the input string
if reduced_string == raw_content {
Cow::Borrowed(raw_content)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
source: crates/rome_js_formatter/tests/prettier_tests.rs
assertion_line: 182
expression: escaped.js
---
# Input
Expand Down Expand Up @@ -52,19 +53,19 @@ expression: escaped.js
"\'";
"\"";
"\\";
"\a";
"hol\a";
"a";
"hol\a";
"hola";
"hol\\a (the a is not escaped)";
"hol\\a (the a is not escaped)";
"multiple \a unnecessary \a escapes";
"multiple \a unnecessary \a escapes";
"multiple a unnecessary a escapes";
"unnecessarily escaped character preceded by escaped backslash \\\a";
"unnecessarily escaped character preceded by escaped backslash \\\a";
"unescaped character preceded by two escaped backslashes \\\\a";
"unescaped character preceded by two escaped backslashes \\\\a";
"\a\a"; // consecutive unnecessarily escaped characters
"\a\a"; // consecutive unnecessarily escaped characters
"aa"; // consecutive unnecessarily escaped characters
"escaped \u2030 \‰ (should still stay escaped)";
// Meaningful escapes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
source: crates/rome_js_formatter/tests/prettier_tests.rs
assertion_line: 182
expression: strings.js
---
# Input
Expand Down Expand Up @@ -146,20 +147,20 @@ expression: strings.js
// Unnecessary escapes.
("'");
('"');
("\a");
("\a");
("hol\a");
("hol\a");
("a");
("a");
("hola");
("hola");
("hol\\a (the a is not escaped)");
("hol\\a (the a is not escaped)");
("multiple \a unnecessary \a escapes");
("multiple \a unnecessary \a escapes");
("multiple a unnecessary a escapes");
("multiple a unnecessary a escapes");
("unnecessarily escaped character preceded by escaped backslash \\\a");
("unnecessarily escaped character preceded by escaped backslash \\\a");
("unescaped character preceded by two escaped backslashes \\\\a");
("unescaped character preceded by two escaped backslashes \\\\a");
("\a\a"); // consecutive unnecessarily escaped characters
("\a\a"); // consecutive unnecessarily escaped characters
("aa"); // consecutive unnecessarily escaped characters
("aa"); // consecutive unnecessarily escaped characters
("escaped \u2030 \‰ (should not stay escaped)");
// Meaningful escapes
Expand Down

0 comments on commit 957bb33

Please sign in to comment.