From 50e160441e04c2b6dda790479641c8888ce34f88 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 22 Jun 2024 12:35:17 -0400 Subject: [PATCH] Improve a couple of regex source generator oddities (#103851) * Avoiding generating a useless IndexOfAny When we can't efficiently enumerate a character class and it's not just a single range, we fall back to outputting a helper IndexOfXx method. This method does an optimized search for either any ASCII member of the set or anything non-ASCII, and then falls back to walking the items one-by-one. This makes sense, unless all of ASCII is in the set, in which case this is a meaningless IndexOfAny call because it's just going to always return 0 or -1. We should avoid emitting an IndexOfAny call in such a case. * This helper is emitting a call to IndexOfAnyExceptInRange, passing in the full ASCII range. The intent was for this to literally output the text `IndexOfAnyExceptInRange('\0', '\u007f')` into the generated C#, but because those are single slashes, it's actually outputting the characters (char)0x0 and (char(0x7F. That's functionally correct, but it's not what was intended and makes the code harder to read. The fix is just to put in the missing slashes. --- .../gen/RegexGenerator.Emitter.cs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 33755cfd62c8a..12014b3fd15e9 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -547,16 +547,19 @@ private static string EmitIndexOfAnyCustomHelper(string set, Dictionary span)"); lines.Add($"{{"); int uncheckedStart = lines.Count; - lines.Add(excludedAsciiChars.Count == 128 ? - $" int i = span.IndexOfAnyExceptInRange('\0', '\u007f');" : + lines.Add(excludedAsciiChars.Count == 128 ? $" int i = span.IndexOfAnyExceptInRange('\\0', '\\u007f');" : // no ASCII is in the set + excludedAsciiChars.Count == 0 ? $" int i = 0;" : // all ASCII is in the set $" int i = span.IndexOfAnyExcept({EmitSearchValues(excludedAsciiChars.ToArray(), requiredHelpers)});"); lines.Add($" if ((uint)i < (uint)span.Length)"); lines.Add($" {{"); - lines.Add($" if (char.IsAscii(span[i]))"); - lines.Add($" {{"); - lines.Add($" return i;"); - lines.Add($" }}"); - lines.Add($""); + if (excludedAsciiChars.Count is not (0 or 128)) + { + lines.Add($" if (char.IsAscii(span[i]))"); + lines.Add($" {{"); + lines.Add($" return i;"); + lines.Add($" }}"); + lines.Add($""); + } if (additionalDeclarations.Count > 0) { lines.AddRange(additionalDeclarations.Select(s => $" {s}"));