From bd8f553f89da3c1b99f7efeed6eded7d5b3faa07 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Wed, 17 Apr 2024 14:57:15 +0200 Subject: [PATCH 1/6] add failing tests to ensure we keep `_` in dashed idents --- .../src/utils/decode-arbitrary-value.test.ts | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/packages/tailwindcss/src/utils/decode-arbitrary-value.test.ts b/packages/tailwindcss/src/utils/decode-arbitrary-value.test.ts index e78a25f7418d..1890465809ee 100644 --- a/packages/tailwindcss/src/utils/decode-arbitrary-value.test.ts +++ b/packages/tailwindcss/src/utils/decode-arbitrary-value.test.ts @@ -14,13 +14,40 @@ describe('decoding arbitrary values', () => { expect(decodeArbitraryValue('foo\\_bar')).toBe('foo_bar') }) - it('should not replace underscores in url()', () => { + it('should not replace underscores in url(…)', () => { expect(decodeArbitraryValue('url(./my_file.jpg)')).toBe('url(./my_file.jpg)') }) - it('should leave var(…) as is', () => { - expect(decodeArbitraryValue('var(--foo)')).toBe('var(--foo)') - expect(decodeArbitraryValue('var(--headings-h1-size)')).toBe('var(--headings-h1-size)') + it('should not replace underscores in var(…)', () => { + expect(decodeArbitraryValue('var(--foo_bar)')).toBe('var(--foo_bar)') + }) + + it('should replace underscores in the fallback value of var(…)', () => { + expect(decodeArbitraryValue('var(--foo_bar, "my_content")')).toBe( + 'var(--foo_bar, "my content")', + ) + }) + + it('should not replace underscores in nested var(…)', () => { + expect(decodeArbitraryValue('var(--foo_bar, var(--bar_baz))')).toBe( + 'var(--foo_bar, var(--bar_baz))', + ) + }) + + it('should replace underscores in the fallback value of nested var(…)', () => { + expect(decodeArbitraryValue('var(--foo_bar, var(--bar_baz, "my_content"))')).toBe( + 'var(--foo_bar, var(--bar_baz, "my content"))', + ) + }) + + it('should not replace underscores in dashed idents', () => { + expect(decodeArbitraryValue('--foo_bar')).toBe('--foo_bar') + }) + + it('should replace underscores in strings that look like dashed idents', () => { + expect(decodeArbitraryValue('content-["some--thing_here"]')).toBe( + 'content-["some--thing here"]', + ) }) }) From 0ce08f8ed77b0c1d0a508e23d91a115a734f2f56 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Wed, 17 Apr 2024 14:58:20 +0200 Subject: [PATCH 2/6] =?UTF-8?q?use=20`charCodeAt(=E2=80=A6)`=20to=20be=20c?= =?UTF-8?q?onsistent=20with=20other=20parts=20of=20the=20codebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tailwindcss/src/utils/decode-arbitrary-value.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/tailwindcss/src/utils/decode-arbitrary-value.ts b/packages/tailwindcss/src/utils/decode-arbitrary-value.ts index f644e663995a..199e68455914 100644 --- a/packages/tailwindcss/src/utils/decode-arbitrary-value.ts +++ b/packages/tailwindcss/src/utils/decode-arbitrary-value.ts @@ -13,6 +13,9 @@ export function decodeArbitraryValue(input: string): string { return input } +const BACKSLASH = 0x5c +const UNDERSCORE = 0x5f + /** * Convert `_` to ` `, except for escaped underscores `\_` they should be * converted to `_` instead. @@ -20,22 +23,22 @@ export function decodeArbitraryValue(input: string): string { function convertUnderscoresToWhitespace(input: string) { let output = '' for (let i = 0; i < input.length; i++) { - let char = input[i] + let char = input.charCodeAt(i) // Escaped underscore - if (char === '\\' && input[i + 1] === '_') { + if (char === BACKSLASH && input.charCodeAt(i + 1) === UNDERSCORE) { output += '_' i += 1 } // Unescaped underscore - else if (char === '_') { + else if (char === UNDERSCORE) { output += ' ' } // All other characters else { - output += char + output += input[i] } } From c4273a4f242c14f08c1cd581c410fd0b2d9b04a3 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Wed, 17 Apr 2024 15:01:37 +0200 Subject: [PATCH 3/6] use `idx`, similar to the `segment` function --- .../tailwindcss/src/utils/decode-arbitrary-value.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/tailwindcss/src/utils/decode-arbitrary-value.ts b/packages/tailwindcss/src/utils/decode-arbitrary-value.ts index 199e68455914..ed75fb73b084 100644 --- a/packages/tailwindcss/src/utils/decode-arbitrary-value.ts +++ b/packages/tailwindcss/src/utils/decode-arbitrary-value.ts @@ -22,13 +22,13 @@ const UNDERSCORE = 0x5f */ function convertUnderscoresToWhitespace(input: string) { let output = '' - for (let i = 0; i < input.length; i++) { - let char = input.charCodeAt(i) + for (let idx = 0; idx < input.length; idx++) { + let char = input.charCodeAt(idx) // Escaped underscore - if (char === BACKSLASH && input.charCodeAt(i + 1) === UNDERSCORE) { + if (char === BACKSLASH && input.charCodeAt(idx + 1) === UNDERSCORE) { output += '_' - i += 1 + idx += 1 } // Unescaped underscore @@ -38,7 +38,7 @@ function convertUnderscoresToWhitespace(input: string) { // All other characters else { - output += input[i] + output += input[idx] } } From 9071adbcc664b1e65b33a4e6bd516a4a2cdf141c Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Wed, 17 Apr 2024 16:39:00 +0200 Subject: [PATCH 4/6] improve `convertUnderscoresToWhitespace` We will now ensure that: 1. Dashed idents are consumed as-is, meaning that underscores (which are valid in dashed-idents) are not converted to a space. 2. Strings are handled separately, which means that values that look like dashed idents containing underscores are still converted to spaces. --- .../src/utils/decode-arbitrary-value.ts | 107 ++++++++++++++++-- 1 file changed, 96 insertions(+), 11 deletions(-) diff --git a/packages/tailwindcss/src/utils/decode-arbitrary-value.ts b/packages/tailwindcss/src/utils/decode-arbitrary-value.ts index ed75fb73b084..15fb46d78b8e 100644 --- a/packages/tailwindcss/src/utils/decode-arbitrary-value.ts +++ b/packages/tailwindcss/src/utils/decode-arbitrary-value.ts @@ -1,5 +1,17 @@ import { addWhitespaceAroundMathOperators } from './math-operators' +const BACKSLASH = 0x5c +const UNDERSCORE = 0x5f +const DASH = 0x2d +const DOUBLE_QUOTE = 0x22 +const SINGLE_QUOTE = 0x27 +const LOWER_A = 0x61 +const LOWER_Z = 0x7a +const UPPER_A = 0x41 +const UPPER_Z = 0x5a +const ZERO = 0x30 +const NINE = 0x39 + export function decodeArbitraryValue(input: string): string { // We do not want to normalize anything inside of a url() because if we // replace `_` with ` `, then it will very likely break the url. @@ -13,29 +25,102 @@ export function decodeArbitraryValue(input: string): string { return input } -const BACKSLASH = 0x5c -const UNDERSCORE = 0x5f - /** - * Convert `_` to ` `, except for escaped underscores `\_` they should be - * converted to `_` instead. + * Convert underscores `_` to whitespace ` ` + * + * Except for: + * + * - Escaped underscores `\_`, these are converted to underscores `_` + * - Dashed idents `--foo_bar`, these are left as-is + * + * Inside strings, dashed idents are considered to be normal strings without any + * special meaning, so the `_` in "dashed idents" are converted to whitespace. */ function convertUnderscoresToWhitespace(input: string) { let output = '' - for (let idx = 0; idx < input.length; idx++) { + let len = input.length + + for (let idx = 0; idx < len; idx++) { let char = input.charCodeAt(idx) - // Escaped underscore - if (char === BACKSLASH && input.charCodeAt(idx + 1) === UNDERSCORE) { - output += '_' - idx += 1 + // Escaped character, consume the next character as-is + if (char === BACKSLASH) { + output += input[++idx] } - // Unescaped underscore + // Underscores are converted to whitespace else if (char === UNDERSCORE) { output += ' ' } + // Start of a dashed ident, consume the ident as-is + else if (char === DASH && input.charCodeAt(idx + 1) === DASH) { + let start = idx + + // Skip the first two dashes, we already know they are there + idx += 2 + + char = input.charCodeAt(idx) + while ( + (char >= LOWER_A && char <= LOWER_Z) || + (char >= UPPER_A && char <= UPPER_Z) || + (char >= ZERO && char <= NINE) || + char === DASH || + char === UNDERSCORE || + char === BACKSLASH + ) { + // Escaped value, consume the next character as-is + if (char === BACKSLASH) { + // In theory, we can also escape a unicode code point where 1 to 6 hex + // digits are allowed after the \. However, each hex digit is also a + // valid ident character, so we can just consume the next character + // as-is and go to the next character. + idx += 1 + } + + // Next character + char = input.charCodeAt(++idx) + } + + output += input.slice(start, idx) + + // The last character was not a valid ident character, so we need to back + // up one character. + idx -= 1 + } + + // Start of a string + else if (char === SINGLE_QUOTE || char === DOUBLE_QUOTE) { + let quote = input[idx++] + + // Keep the quote + output += quote + + // Consume to the end of the string, but replace any non-escaped + // underscores with spaces. + while (idx < len && input.charCodeAt(idx) !== char) { + // Escaped character, consume the next character as-is + if (input.charCodeAt(idx) === BACKSLASH) { + output += input[++idx] + } + + // Unescaped underscore + else if (input.charCodeAt(idx) === UNDERSCORE) { + output += ' ' + } + + // All other characters + else { + output += input[idx] + } + + idx += 1 + } + + // Keep the end quote + output += quote + } + // All other characters else { output += input[idx] From 555a33e88b668b4579c3caf54ca1499ba4d6da69 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Wed, 17 Apr 2024 17:06:42 +0200 Subject: [PATCH 5/6] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40baa4c42bbd..bfe8bbeb97f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Make sure `contain-*` utility variables resolve to a valid value ([#13521](https://github.com/tailwindlabs/tailwindcss/pull/13521)) - Support unbalanced parentheses and braces in quotes in arbitrary values and variants ([#13608](https://github.com/tailwindlabs/tailwindcss/pull/13608)) +- Keep underscores in dashed-idents ([#13538](https://github.com/tailwindlabs/tailwindcss/pull/13538)) ### Changed From f539b2c99cac572709af66179225a59ce2f9ea86 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Wed, 8 May 2024 13:17:50 +0200 Subject: [PATCH 6/6] convert `\_` -> `_` But keep any other escaped value as-is (including escape character) --- .../src/utils/decode-arbitrary-value.ts | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/packages/tailwindcss/src/utils/decode-arbitrary-value.ts b/packages/tailwindcss/src/utils/decode-arbitrary-value.ts index 15fb46d78b8e..e1f6d708fc82 100644 --- a/packages/tailwindcss/src/utils/decode-arbitrary-value.ts +++ b/packages/tailwindcss/src/utils/decode-arbitrary-value.ts @@ -43,9 +43,20 @@ function convertUnderscoresToWhitespace(input: string) { for (let idx = 0; idx < len; idx++) { let char = input.charCodeAt(idx) - // Escaped character, consume the next character as-is - if (char === BACKSLASH) { - output += input[++idx] + // Escaped values + if (input.charCodeAt(idx) === BACKSLASH) { + // An escaped underscore (e.g.: `\_`) is converted to a non-escaped + // underscore, but without converting the `_` to a space. + if (input.charCodeAt(idx + 1) === UNDERSCORE) { + output += '_' + idx += 1 + } + + // Consume the backslash and the next character as-is + else { + output += input.slice(idx, idx + 2) + idx += 1 + } } // Underscores are converted to whitespace @@ -99,9 +110,20 @@ function convertUnderscoresToWhitespace(input: string) { // Consume to the end of the string, but replace any non-escaped // underscores with spaces. while (idx < len && input.charCodeAt(idx) !== char) { - // Escaped character, consume the next character as-is + // Escaped values if (input.charCodeAt(idx) === BACKSLASH) { - output += input[++idx] + // An escaped underscore (e.g.: `\_`) is converted to a non-escaped + // underscore, but without converting the `_` to a space. + if (input.charCodeAt(idx + 1) === UNDERSCORE) { + output += '_' + idx += 1 + } + + // Consume the backslash and the next character as-is + else { + output += input.slice(idx, idx + 2) + idx += 1 + } } // Unescaped underscore