feat(rome_js_formatter): remove unnecessary escapes (#2804)

rome · Jul 4, 2022 · 957bb33 · 957bb33
1 parent 0001b18
commit 957bb33
Show file tree

Hide file tree

Showing 4 changed files with 94 additions and 99 deletions.
diff --git a/crates/rome_js_formatter/src/lib.rs b/crates/rome_js_formatter/src/lib.rs
@@ -457,16 +457,8 @@ mod test {
     // use this test check if your snippet prints as you wish, without using a snapshot
     fn quick_test() {
         let src = r#"
-const MyComponentWithLongName2: React.VoidFunctionComponent<
-  MyComponentWithLongNameProps
-> = ({
-  x,
-}) => {
-  const a = useA();
-  return (
-   f
-  );
-};
+"hol\a"
+'hol\c'
         "#;
         let syntax = SourceType::tsx();
         let tree = parse(src, 0, syntax);

diff --git a/crates/rome_js_formatter/src/utils/string_utils.rs b/crates/rome_js_formatter/src/utils/string_utils.rs
@@ -41,8 +41,6 @@ impl ToAsciiLowercaseCow for String {
     }
 }
 
-const CHARACTERS_THAT_COULD_KEEP_THE_ESCAPE: [char; 4] = ['\\', '\'', '"', '\r'];
-
 #[derive(Eq, PartialEq, Debug)]
 pub(crate) enum StringLiteralParentKind {
     /// Variant to track tokens that are inside an expression
@@ -194,6 +192,7 @@ impl FormatLiteralStringToken<'_> {
 }
 
 /// This signal is used to tell to the next character what it should do
+#[derive(Eq, PartialEq)]
 enum CharSignal {
     /// There hasn't been any signal
     None,
@@ -256,12 +255,13 @@ impl<'token> LiteralStringNormaliser<'token> {
         self.token.token()
     }
 
-    fn can_reduce_escapes(&self, string_information: &StringInformation) -> bool {
-        if matches!(self.token.parent_kind, StringLiteralParentKind::Directive) {
-            !string_information.raw_content_has_quotes
-        } else {
-            true
-        }
+    /// A string should be manipulated only if its raw content contains backslash or quotes
+    fn should_manipulate_string(&self, string_information: &StringInformation) -> bool {
+        let preferred_quote = string_information.preferred_quote;
+        let alternate_quote = preferred_quote.other();
+        let raw_content = self.raw_content();
+        raw_content.contains(['\\', preferred_quote.as_char(), alternate_quote.as_char()])
+            || !matches!(self.token.parent_kind, StringLiteralParentKind::Directive)
     }
 
     fn normalise_directive(&mut self, string_information: &StringInformation) -> Cow<'token, str> {
@@ -389,88 +389,96 @@ impl<'token> LiteralStringNormaliser<'token> {
     /// By default the formatter uses `\n` as a newline. The function replaces
     /// `\r\n` with `\n`,
     fn normalize_string(&self, string_information: &StringInformation) -> Cow<'token, str> {
-        let preferred_quote = string_information.preferred_quote;
+        let raw_content = self.raw_content();
 
+        if !self.should_manipulate_string(string_information) {
+            return Cow::Borrowed(raw_content);
+        }
+        let preferred_quote = string_information.preferred_quote;
         let alternate_quote = preferred_quote.other();
         let mut reduced_string = String::new();
-        let mut last_end = 0;
         let mut signal = CharSignal::None;
-        let raw_content = self.raw_content();
-        let can_reduce_escapes = self.can_reduce_escapes(string_information);
-
-        for (start, part) in raw_content.match_indices(CHARACTERS_THAT_COULD_KEEP_THE_ESCAPE) {
-            if start - last_end >= 1 {
-                // This is the case where we don't have consecutive characters and if so, we have to reset the signal.
-                // An example is the following: " \\u2028 ' "
-                // After the two backslash, we have a character that is not a quote. So we reset the signal and we
-                // iterate over the single quote, we don't deal with any edge case.
-                signal = CharSignal::None;
+
+        let mut chars = raw_content.char_indices().peekable();
+
+        while let Some((_, current_char)) = chars.next() {
+            let next_character = chars.peek();
+
+            if let AlreadyPrinted(char) = signal {
+                if char == current_char {
+                    continue;
+                }
             }
-            reduced_string.push_str(&raw_content[last_end..start]);
-            last_end = start + part.len();
-
-            match part {
-                "\\" => {
-                    if !can_reduce_escapes {
-                        reduced_string.push_str(part);
-                        continue;
-                    }
+
+            match current_char {
+                '\\' => {
                     let bytes = raw_content.as_bytes();
 
-                    match bytes[start] {
-                        // TODO: #2444 add checks to additional characters to reduce the number of escapes
-                        // "\a" VS "\n" => "a" VS "\n"
-                        b'\\' => {
-                            if start + 1 < bytes.len() {
-                                // If we encounter an alternate quote that is escaped, we have to
-                                // remove the escape from it.
-                                // This is done because of how the enclosed strings can change.
-                                // Check `computed_preferred_quote` for more details.
-                                if bytes[start + 1] == alternate_quote.as_bytes()
+                    if let Some((next_index, next_character)) = next_character {
+                        // If we encounter an alternate quote that is escaped, we have to
+                        // remove the escape from it.
+                        // This is done because of how the enclosed strings can change.
+                        // Check `computed_preferred_quote` for more details.
+                        if *next_character as u8 == alternate_quote.as_bytes()
                                     // This check is a safety net for cases where the backslash is at the end
                                     // of the raw content:
                                     // ("\\")
                                     // The second backslash is at the end.
-                                    && start + 2 <= bytes.len()
-                                {
-                                    match signal {
-                                        CharSignal::Keep => {
-                                            reduced_string.push('\\');
-                                        }
-                                        _ => {
-                                            reduced_string.push(alternate_quote.as_char());
-                                            signal = AlreadyPrinted(alternate_quote.as_char());
-                                        }
-                                    }
-                                } else {
-                                    // The next character is another backslash, let's signal
-                                    // the next iteration that it should keep it in the final string
-                                    if bytes[start + 1] == b'\\' {
-                                        signal = CharSignal::Keep;
-                                    }
-                                    // fallback, keep the backslash
-                                    reduced_string.push('\\');
+                                    && *next_index < bytes.len()
+                        {
+                            match signal {
+                                CharSignal::Keep => {
+                                    reduced_string.push(current_char);
+                                }
+                                _ => {
+                                    reduced_string.push(alternate_quote.as_char());
+                                    signal = AlreadyPrinted(alternate_quote.as_char());
                                 }
-                            } else {
-                                // fallback, keep the backslash
-                                reduced_string.push('\\');
                             }
+                        } else if signal == CharSignal::Keep {
+                            reduced_string.push(current_char);
+                        }
+                        // The next character is another backslash, or
+                        // a character that should be kept in the next iteration
+                        else if matches!(
+                            next_character,
+                            '\\' | 'v' | 'b' | 'f' | 'n' | 't' | 'r' | 'u' | 'x'
+                        ) {
+                            signal = CharSignal::Keep;
+                            // fallback, keep the backslash
+                            reduced_string.push(current_char);
+                        }
+                        // these are character that should stay, but
+                        // the next iteration should decide if to keep them or not
+                        else if !next_character.is_alphabetic()
+                            && *next_character != alternate_quote.as_char()
+                            && *next_character != preferred_quote.as_char()
+                        {
+                            reduced_string.push(current_char);
+                        } else {
+                            // these, usually characters that can have their
+                            // escape removed: "\a" => "a"
+                            // So we ignore the current slash and we continue
+                            // to the next iteration
+                            continue;
                         }
-                        _ => unreachable!("We checked already the presence of a backslash"),
+                    } else {
+                        // fallback, keep the backslash
+                        reduced_string.push(current_char);
                     }
                 }
-                "\n" => {
+                '\n' | '\t' => {
                     if let AlreadyPrinted(the_char) = signal {
-                        if the_char == '\n' {
+                        if matches!(the_char, '\n' | '\t') {
                             signal = CharSignal::None
                         }
                     } else {
-                        reduced_string.push('\n');
+                        reduced_string.push(current_char);
                     }
                 }
                 // If the current character is \r and the
                 // next is \n, skip over the entire sequence
-                "\r" if raw_content[last_end..].starts_with('\n') => {
+                '\r' if next_character.map_or(false, |(_, c)| *c == '\n') => {
                     reduced_string.push('\n');
                     signal = AlreadyPrinted('\n');
                 }
@@ -479,22 +487,14 @@ impl<'token> LiteralStringNormaliser<'token> {
                     // an escaped version.
                     // This is done because of how the enclosed strings can change.
                     // Check `computed_preferred_quote` for more details.
-                    if part == preferred_quote.as_string() {
-                        if !can_reduce_escapes {
-                            reduced_string.push_str(part);
-                            continue;
-                        }
+                    if current_char == preferred_quote.as_char() {
                         let last_char = &reduced_string.chars().last();
                         if let Some('\\') = last_char {
                             reduced_string.push(preferred_quote.as_char());
                         } else {
                             reduced_string.push_str(preferred_quote.as_escaped());
                         }
-                    } else if part == alternate_quote.as_string() {
-                        if !can_reduce_escapes {
-                            reduced_string.push_str(part);
-                            continue;
-                        }
+                    } else if current_char == alternate_quote.as_char() {
                         match signal {
                             CharSignal::None => {
                                 reduced_string.push(alternate_quote.as_char());
@@ -510,6 +510,8 @@ impl<'token> LiteralStringNormaliser<'token> {
                                 }
                             }
                         }
+                    } else {
+                        reduced_string.push(current_char);
                     }
                 }
             }
@@ -519,7 +521,6 @@ impl<'token> LiteralStringNormaliser<'token> {
         if reduced_string.is_empty() {
             Cow::Borrowed(raw_content)
         } else {
-            reduced_string.push_str(&raw_content[last_end..raw_content.len()]);
             // don't allocate a new string if the new string is still equals to the input string
             if reduced_string == raw_content {
                 Cow::Borrowed(raw_content)

diff --git a/crates/rome_js_formatter/tests/specs/prettier/js/directives/escaped.js.snap b/crates/rome_js_formatter/tests/specs/prettier/js/directives/escaped.js.snap
@@ -1,5 +1,6 @@
 ---
 source: crates/rome_js_formatter/tests/prettier_tests.rs
+assertion_line: 182
 expression: escaped.js
 ---
 # Input
@@ -52,19 +53,19 @@ expression: escaped.js
 "\'";
 "\"";
 "\\";
-"\a";
-"hol\a";
+"a";
 "hol\a";
+"hola";
 "hol\\a (the a is not escaped)";
 "hol\\a (the a is not escaped)";
 "multiple \a unnecessary \a escapes";
-"multiple \a unnecessary \a escapes";
+"multiple a unnecessary a escapes";
 "unnecessarily escaped character preceded by escaped backslash \\\a";
 "unnecessarily escaped character preceded by escaped backslash \\\a";
 "unescaped character preceded by two escaped backslashes       \\\\a";
 "unescaped character preceded by two escaped backslashes       \\\\a";
 "\a\a"; // consecutive unnecessarily escaped characters
-"\a\a"; // consecutive unnecessarily escaped characters
+"aa"; // consecutive unnecessarily escaped characters
 "escaped \u2030 \‰ (should still stay escaped)";
 
 // Meaningful escapes

diff --git a/crates/rome_js_formatter/tests/specs/prettier/js/quotes/strings.js.snap b/crates/rome_js_formatter/tests/specs/prettier/js/quotes/strings.js.snap
@@ -1,5 +1,6 @@
 ---
 source: crates/rome_js_formatter/tests/prettier_tests.rs
+assertion_line: 182
 expression: strings.js
 ---
 # Input
@@ -146,20 +147,20 @@ expression: strings.js
 // Unnecessary escapes.
 ("'");
 ('"');
-("\a");
-("\a");
-("hol\a");
-("hol\a");
+("a");
+("a");
+("hola");
+("hola");
 ("hol\\a (the a is not escaped)");
 ("hol\\a (the a is not escaped)");
-("multiple \a unnecessary \a escapes");
-("multiple \a unnecessary \a escapes");
+("multiple a unnecessary a escapes");
+("multiple a unnecessary a escapes");
 ("unnecessarily escaped character preceded by escaped backslash \\\a");
 ("unnecessarily escaped character preceded by escaped backslash \\\a");
 ("unescaped character preceded by two escaped backslashes       \\\\a");
 ("unescaped character preceded by two escaped backslashes       \\\\a");
-("\a\a"); // consecutive unnecessarily escaped characters
-("\a\a"); // consecutive unnecessarily escaped characters
+("aa"); // consecutive unnecessarily escaped characters
+("aa"); // consecutive unnecessarily escaped characters
 ("escaped \u2030 \‰ (should not stay escaped)");
 
 // Meaningful escapes