-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
FileMatcher.cs
2695 lines (2432 loc) · 117 KB
/
FileMatcher.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
using System;
using System.Collections.Concurrent;
using System.IO;
using System.Text;
using System.Diagnostics;
using System.Linq;
using System.Text.RegularExpressions;
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.Build.Framework;
using Microsoft.Build.Shared.FileSystem;
#nullable disable
namespace Microsoft.Build.Shared
{
/// <summary>
/// Functions for matching file names with patterns.
/// </summary>
internal class FileMatcher
{
private readonly IFileSystem _fileSystem;
private const string recursiveDirectoryMatch = "**";
private static readonly string s_directorySeparator = new string(Path.DirectorySeparatorChar, 1);
private static readonly string s_thisDirectory = "." + s_directorySeparator;
private static readonly char[] s_wildcardCharacters = { '*', '?' };
private static readonly char[] s_wildcardAndSemicolonCharacters = { '*', '?', ';' };
private static readonly string[] s_propertyAndItemReferences = { "$(", "@(" };
// on OSX both System.IO.Path separators are '/', so we have to use the literals
internal static readonly char[] directorySeparatorCharacters = FileUtilities.Slashes;
// until Cloudbuild switches to EvaluationContext, we need to keep their dependence on global glob caching via an environment variable
private static readonly Lazy<ConcurrentDictionary<string, IReadOnlyList<string>>> s_cachedGlobExpansions = new Lazy<ConcurrentDictionary<string, IReadOnlyList<string>>>(() => new ConcurrentDictionary<string, IReadOnlyList<string>>(StringComparer.OrdinalIgnoreCase));
private static readonly Lazy<ConcurrentDictionary<string, object>> s_cachedGlobExpansionsLock = new Lazy<ConcurrentDictionary<string, object>>(() => new ConcurrentDictionary<string, object>(StringComparer.OrdinalIgnoreCase));
private readonly ConcurrentDictionary<string, IReadOnlyList<string>> _cachedGlobExpansions;
private readonly Lazy<ConcurrentDictionary<string, object>> _cachedGlobExpansionsLock = new Lazy<ConcurrentDictionary<string, object>>(() => new ConcurrentDictionary<string, object>(StringComparer.OrdinalIgnoreCase));
/// <summary>
/// Cache of the list of invalid path characters, because this method returns a clone (for security reasons)
/// which can cause significant transient allocations
/// </summary>
private static readonly char[] s_invalidPathChars = Path.GetInvalidPathChars();
public const RegexOptions DefaultRegexOptions = RegexOptions.IgnoreCase;
private readonly GetFileSystemEntries _getFileSystemEntries;
private static class FileSpecRegexParts
{
internal const string BeginningOfLine = "^";
internal const string WildcardGroupStart = "(?<WILDCARDDIR>";
internal const string FilenameGroupStart = "(?<FILENAME>";
internal const string GroupEnd = ")";
internal const string EndOfLine = "$";
internal const string AnyNonSeparator = @"[^/\\]*";
internal const string AnySingleCharacterButDot = @"[^\.].";
internal const string AnythingButDot = @"[^\.]*";
internal const string DirSeparator = @"[/\\]+";
internal const string LeftDirs = @"((.*/)|(.*\\)|())";
internal const string MiddleDirs = @"((/)|(\\)|(/.*/)|(/.*\\)|(\\.*\\)|(\\.*/))";
internal const string SingleCharacter = ".";
internal const string UncSlashSlash = @"\\\\";
}
/*
* FileSpecRegexParts.BeginningOfLine.Length + FileSpecRegexParts.WildcardGroupStart.Length + FileSpecRegexParts.GroupEnd.Length
+ FileSpecRegexParts.FilenameGroupStart.Length + FileSpecRegexParts.GroupEnd.Length + FileSpecRegexParts.EndOfLine.Length;
*/
private const int FileSpecRegexMinLength = 31;
/// <summary>
/// The Default FileMatcher does not cache directory enumeration.
/// </summary>
public static FileMatcher Default = new FileMatcher(FileSystems.Default, null);
public FileMatcher(IFileSystem fileSystem, ConcurrentDictionary<string, IReadOnlyList<string>> fileEntryExpansionCache = null) : this(
fileSystem,
(entityType, path, pattern, projectDirectory, stripProjectDirectory) => GetAccessibleFileSystemEntries(
fileSystem,
entityType,
path,
pattern,
projectDirectory,
stripProjectDirectory).ToArray(),
fileEntryExpansionCache)
{
}
internal FileMatcher(IFileSystem fileSystem, GetFileSystemEntries getFileSystemEntries, ConcurrentDictionary<string, IReadOnlyList<string>> getFileSystemDirectoryEntriesCache = null)
{
if (Traits.Instance.MSBuildCacheFileEnumerations)
{
_cachedGlobExpansions = s_cachedGlobExpansions.Value;
_cachedGlobExpansionsLock = s_cachedGlobExpansionsLock;
}
else
{
_cachedGlobExpansions = getFileSystemDirectoryEntriesCache;
}
_fileSystem = fileSystem;
_getFileSystemEntries = getFileSystemDirectoryEntriesCache == null
? getFileSystemEntries
: (type, path, pattern, directory, stripProjectDirectory) =>
{
// Always hit the filesystem with "*" pattern, cache the results, and do the filtering here.
string cacheKey = type switch
{
FileSystemEntity.Files => "F",
FileSystemEntity.Directories => "D",
FileSystemEntity.FilesAndDirectories => "A",
_ => throw new NotImplementedException()
} + ";" + path;
IReadOnlyList<string> allEntriesForPath = getFileSystemDirectoryEntriesCache.GetOrAdd(
cacheKey,
s => getFileSystemEntries(
type,
path,
"*",
directory,
false));
IEnumerable<string> filteredEntriesForPath = (pattern != null && !IsAllFilesWildcard(pattern))
? allEntriesForPath.Where(o => IsFileNameMatch(o, pattern))
: allEntriesForPath;
return stripProjectDirectory
? RemoveProjectDirectory(filteredEntriesForPath, directory).ToArray()
: filteredEntriesForPath.ToArray();
};
}
/// <summary>
/// The type of entity that GetFileSystemEntries should return.
/// </summary>
internal enum FileSystemEntity
{
Files,
Directories,
FilesAndDirectories
};
/// <summary>
/// Delegate defines the GetFileSystemEntries signature that GetLongPathName uses
/// to enumerate directories on the file system.
/// </summary>
/// <param name="entityType">Files, Directories, or Files and Directories</param>
/// <param name="path">The path to search.</param>
/// <param name="pattern">The file pattern.</param>
/// <param name="projectDirectory"></param>
/// <param name="stripProjectDirectory"></param>
/// <returns>An enumerable of filesystem entries.</returns>
internal delegate IReadOnlyList<string> GetFileSystemEntries(FileSystemEntity entityType, string path, string pattern, string projectDirectory, bool stripProjectDirectory);
internal static void ClearFileEnumerationsCache()
{
if (s_cachedGlobExpansions.IsValueCreated)
{
s_cachedGlobExpansions.Value.Clear();
}
if (s_cachedGlobExpansionsLock.IsValueCreated)
{
s_cachedGlobExpansionsLock.Value.Clear();
}
}
/// <summary>
/// Determines whether the given path has any wild card characters.
/// </summary>
internal static bool HasWildcards(string filespec)
{
// Perf Note: Doing a [Last]IndexOfAny(...) is much faster than compiling a
// regular expression that does the same thing, regardless of whether
// filespec contains one of the characters.
// Choose LastIndexOfAny instead of IndexOfAny because it seems more likely
// that wildcards will tend to be towards the right side.
return -1 != filespec.LastIndexOfAny(s_wildcardCharacters);
}
/// <summary>
/// Determines whether the given path has any wild card characters, any semicolons or any property references.
/// </summary>
internal static bool HasWildcardsSemicolonItemOrPropertyReferences(string filespec)
{
return
(-1 != filespec.IndexOfAny(s_wildcardAndSemicolonCharacters)) ||
HasPropertyOrItemReferences(filespec)
;
}
/// <summary>
/// Determines whether the given path has any property references.
/// </summary>
internal static bool HasPropertyOrItemReferences(string filespec)
{
return s_propertyAndItemReferences.Any(filespec.Contains);
}
/// <summary>
/// Get the files and\or folders specified by the given path and pattern.
/// </summary>
/// <param name="entityType">Whether Files, Directories or both.</param>
/// <param name="path">The path to search.</param>
/// <param name="pattern">The pattern to search.</param>
/// <param name="projectDirectory">The directory for the project within which the call is made</param>
/// <param name="stripProjectDirectory">If true the project directory should be stripped</param>
/// <param name="fileSystem">The file system abstraction to use that implements file system operations</param>
/// <returns></returns>
private static IReadOnlyList<string> GetAccessibleFileSystemEntries(IFileSystem fileSystem, FileSystemEntity entityType, string path, string pattern, string projectDirectory, bool stripProjectDirectory)
{
path = FileUtilities.FixFilePath(path);
switch (entityType)
{
case FileSystemEntity.Files: return GetAccessibleFiles(fileSystem, path, pattern, projectDirectory, stripProjectDirectory);
case FileSystemEntity.Directories: return GetAccessibleDirectories(fileSystem, path, pattern);
case FileSystemEntity.FilesAndDirectories: return GetAccessibleFilesAndDirectories(fileSystem, path, pattern);
default:
ErrorUtilities.ThrowInternalError("Unexpected filesystem entity type.");
break;
}
return Array.Empty<string>();
}
/// <summary>
/// Returns an enumerable of file system entries matching the specified search criteria. Inaccessible or non-existent file
/// system entries are skipped.
/// </summary>
/// <param name="path"></param>
/// <param name="pattern"></param>
/// <param name="fileSystem">The file system abstraction to use that implements file system operations</param>
/// <returns>An enumerable of matching file system entries (can be empty).</returns>
private static IReadOnlyList<string> GetAccessibleFilesAndDirectories(IFileSystem fileSystem, string path, string pattern)
{
if (fileSystem.DirectoryExists(path))
{
try
{
return (ShouldEnforceMatching(pattern)
? fileSystem.EnumerateFileSystemEntries(path, pattern)
.Where(o => IsFileNameMatch(o, pattern))
: fileSystem.EnumerateFileSystemEntries(path, pattern)
).ToArray();
}
// for OS security
catch (UnauthorizedAccessException)
{
// do nothing
}
// for code access security
catch (System.Security.SecurityException)
{
// do nothing
}
}
return Array.Empty<string>();
}
/// <summary>
/// Determine if the given search pattern will match loosely on Windows
/// </summary>
/// <param name="searchPattern">The search pattern to check</param>
/// <returns></returns>
private static bool ShouldEnforceMatching(string searchPattern)
{
if (searchPattern == null)
{
return false;
}
// https://github.com/dotnet/msbuild/issues/3060
// NOTE: Corefx matches loosely in three cases (in the absence of the * wildcard in the extension):
// 1) if the extension ends with the ? wildcard, it matches files with shorter extensions also e.g. "file.tx?" would
// match both "file.txt" and "file.tx"
// 2) if the extension is three characters, and the filename contains the * wildcard, it matches files with longer
// extensions that start with the same three characters e.g. "*.htm" would match both "file.htm" and "file.html"
// 3) if the ? wildcard is to the left of a period, it matches files with shorter name e.g. ???.txt would match
// foo.txt, fo.txt and also f.txt
return searchPattern.IndexOf("?.", StringComparison.Ordinal) != -1 ||
(
Path.GetExtension(searchPattern).Length == (3 + 1 /* +1 for the period */) &&
searchPattern.IndexOf('*') != -1
) ||
searchPattern.EndsWith("?", StringComparison.Ordinal);
}
/// <summary>
/// Same as Directory.EnumerateFiles(...) except that files that
/// aren't accessible are skipped instead of throwing an exception.
///
/// Other exceptions are passed through.
/// </summary>
/// <param name="path">The path.</param>
/// <param name="filespec">The pattern.</param>
/// <param name="projectDirectory">The project directory</param>
/// <param name="stripProjectDirectory"></param>
/// <param name="fileSystem">The file system abstraction to use that implements file system operations</param>
/// <returns>Files that can be accessed.</returns>
private static IReadOnlyList<string> GetAccessibleFiles
(
IFileSystem fileSystem,
string path,
string filespec, // can be null
string projectDirectory,
bool stripProjectDirectory
)
{
try
{
// look in current directory if no path specified
string dir = ((path.Length == 0) ? s_thisDirectory : path);
// get all files in specified directory, unless a file-spec has been provided
IEnumerable<string> files;
if (filespec == null)
{
files = fileSystem.EnumerateFiles(dir);
}
else
{
files = fileSystem.EnumerateFiles(dir, filespec);
if (ShouldEnforceMatching(filespec))
{
files = files.Where(o => IsFileNameMatch(o, filespec));
}
}
// If the Item is based on a relative path we need to strip
// the current directory from the front
if (stripProjectDirectory)
{
files = RemoveProjectDirectory(files, projectDirectory);
}
// Files in the current directory are coming back with a ".\"
// prepended to them. We need to remove this; it breaks the
// IDE, which expects just the filename if it is in the current
// directory. But only do this if the original path requested
// didn't itself contain a ".\".
else if (!path.StartsWith(s_thisDirectory, StringComparison.Ordinal))
{
files = RemoveInitialDotSlash(files);
}
return files.ToArray();
}
catch (System.Security.SecurityException)
{
// For code access security.
return Array.Empty<string>();
}
catch (System.UnauthorizedAccessException)
{
// For OS security.
return Array.Empty<string>();
}
}
/// <summary>
/// Same as Directory.EnumerateDirectories(...) except that files that
/// aren't accessible are skipped instead of throwing an exception.
///
/// Other exceptions are passed through.
/// </summary>
/// <param name="path">The path.</param>
/// <param name="pattern">Pattern to match</param>
/// <param name="fileSystem">The file system abstraction to use that implements file system operations</param>
/// <returns>Accessible directories.</returns>
private static IReadOnlyList<string> GetAccessibleDirectories
(
IFileSystem fileSystem,
string path,
string pattern
)
{
try
{
IEnumerable<string> directories = null;
if (pattern == null)
{
directories = fileSystem.EnumerateDirectories((path.Length == 0) ? s_thisDirectory : path);
}
else
{
directories = fileSystem.EnumerateDirectories((path.Length == 0) ? s_thisDirectory : path, pattern);
if (ShouldEnforceMatching(pattern))
{
directories = directories.Where(o => IsFileNameMatch(o, pattern));
}
}
// Subdirectories in the current directory are coming back with a ".\"
// prepended to them. We need to remove this; it breaks the
// IDE, which expects just the filename if it is in the current
// directory. But only do this if the original path requested
// didn't itself contain a ".\".
if (!path.StartsWith(s_thisDirectory, StringComparison.Ordinal))
{
directories = RemoveInitialDotSlash(directories);
}
return directories.ToArray();
}
catch (System.Security.SecurityException)
{
// For code access security.
return Array.Empty<string>();
}
catch (System.UnauthorizedAccessException)
{
// For OS security.
return Array.Empty<string>();
}
}
/// <summary>
/// Given a path name, get its long version.
/// </summary>
/// <param name="path">The short path.</param>
/// <returns>The long path.</returns>
internal string GetLongPathName
(
string path
)
{
return GetLongPathName(path, _getFileSystemEntries);
}
/// <summary>
/// Given a path name, get its long version.
/// </summary>
/// <param name="path">The short path.</param>
/// <param name="getFileSystemEntries">Delegate.</param>
/// <returns>The long path.</returns>
internal static string GetLongPathName
(
string path,
GetFileSystemEntries getFileSystemEntries
)
{
if (path.IndexOf("~", StringComparison.Ordinal) == -1)
{
// A path with no '~' must not be a short name.
return path;
}
ErrorUtilities.VerifyThrow(!HasWildcards(path),
"GetLongPathName does not handle wildcards and was passed '{0}'.", path);
string[] parts = path.Split(directorySeparatorCharacters);
string pathRoot;
bool isUnc = path.StartsWith(s_directorySeparator + s_directorySeparator, StringComparison.Ordinal);
int startingElement;
if (isUnc)
{
pathRoot = s_directorySeparator + s_directorySeparator;
pathRoot += parts[2];
pathRoot += s_directorySeparator;
pathRoot += parts[3];
pathRoot += s_directorySeparator;
startingElement = 4;
}
else
{
// Is it relative?
if (path.Length > 2 && path[1] == ':')
{
// Not relative
pathRoot = parts[0] + s_directorySeparator;
startingElement = 1;
}
else
{
// Relative
pathRoot = string.Empty;
startingElement = 0;
}
}
// Build up an array of parts. These elements may be "" if there are
// extra slashes.
string[] longParts = new string[parts.Length - startingElement];
string longPath = pathRoot;
for (int i = startingElement; i < parts.Length; ++i)
{
// If there is a zero-length part, then that means there was an extra slash.
if (parts[i].Length == 0)
{
longParts[i - startingElement] = string.Empty;
}
else
{
if (parts[i].IndexOf("~", StringComparison.Ordinal) == -1)
{
// If there's no ~, don't hit the disk.
longParts[i - startingElement] = parts[i];
longPath = Path.Combine(longPath, parts[i]);
}
else
{
// getFileSystemEntries(...) returns an empty enumerable if longPath doesn't exist.
IReadOnlyList<string> entries = getFileSystemEntries(FileSystemEntity.FilesAndDirectories, longPath, parts[i], null, false);
if (0 == entries.Count)
{
// The next part doesn't exist. Therefore, no more of the path will exist.
// Just return the rest.
for (int j = i; j < parts.Length; ++j)
{
longParts[j - startingElement] = parts[j];
}
break;
}
// Since we know there are no wild cards, this should be length one, i.e. MoveNext should return false.
ErrorUtilities.VerifyThrow(entries.Count == 1,
"Unexpected number of entries ({3}) found when enumerating '{0}' under '{1}'. Original path was '{2}'",
parts[i], longPath, path, entries.Count);
// Entries[0] contains the full path.
longPath = entries[0];
// We just want the trailing node.
longParts[i - startingElement] = Path.GetFileName(longPath);
}
}
}
return pathRoot + string.Join(s_directorySeparator, longParts);
}
/// <summary>
/// Given a filespec, split it into left-most 'fixed' dir part, middle 'wildcard' dir part, and filename part.
/// The filename part may have wildcard characters in it.
/// </summary>
/// <param name="filespec">The filespec to be decomposed.</param>
/// <param name="fixedDirectoryPart">Receives the fixed directory part.</param>
/// <param name="wildcardDirectoryPart">The wildcard directory part.</param>
/// <param name="filenamePart">The filename part.</param>
internal void SplitFileSpec(
string filespec,
out string fixedDirectoryPart,
out string wildcardDirectoryPart,
out string filenamePart)
{
PreprocessFileSpecForSplitting
(
filespec,
out fixedDirectoryPart,
out wildcardDirectoryPart,
out filenamePart
);
/*
* Handle the special case in which filenamePart is '**'.
* In this case, filenamePart becomes '*.*' and the '**' is appended
* to the end of the wildcardDirectory part.
* This is so that later regular expression matching can accurately
* pull out the different parts (fixed, wildcard, filename) of given
* file specs.
*/
if (recursiveDirectoryMatch == filenamePart)
{
wildcardDirectoryPart += recursiveDirectoryMatch;
wildcardDirectoryPart += s_directorySeparator;
filenamePart = "*.*";
}
fixedDirectoryPart = FileMatcher.GetLongPathName(fixedDirectoryPart, _getFileSystemEntries);
}
/// <summary>
/// Do most of the grunt work of splitting the filespec into parts.
/// Does not handle post-processing common to the different matching
/// paths.
/// </summary>
/// <param name="filespec">The filespec to be decomposed.</param>
/// <param name="fixedDirectoryPart">Receives the fixed directory part.</param>
/// <param name="wildcardDirectoryPart">The wildcard directory part.</param>
/// <param name="filenamePart">The filename part.</param>
private static void PreprocessFileSpecForSplitting
(
string filespec,
out string fixedDirectoryPart,
out string wildcardDirectoryPart,
out string filenamePart
)
{
filespec = FileUtilities.FixFilePath(filespec);
int indexOfLastDirectorySeparator = filespec.LastIndexOfAny(directorySeparatorCharacters);
if (-1 == indexOfLastDirectorySeparator)
{
/*
* No dir separator found. This is either this form,
*
* Source.cs
* *.cs
*
* or this form,
*
* **
*/
fixedDirectoryPart = string.Empty;
wildcardDirectoryPart = string.Empty;
filenamePart = filespec;
return;
}
int indexOfFirstWildcard = filespec.IndexOfAny(s_wildcardCharacters);
if
(
-1 == indexOfFirstWildcard
|| indexOfFirstWildcard > indexOfLastDirectorySeparator
)
{
/*
* There is at least one dir separator, but either there is no wild card or the
* wildcard is after the dir separator.
*
* The form is one of these:
*
* dir1\Source.cs
* dir1\*.cs
*
* Where the trailing spec is meant to be a filename. Or,
*
* dir1\**
*
* Where the trailing spec is meant to be any file recursively.
*/
// We know the fixed director part now.
fixedDirectoryPart = filespec.Substring(0, indexOfLastDirectorySeparator + 1);
wildcardDirectoryPart = string.Empty;
filenamePart = filespec.Substring(indexOfLastDirectorySeparator + 1);
return;
}
/*
* Find the separator right before the first wildcard.
*/
string filespecLeftOfWildcard = filespec.Substring(0, indexOfFirstWildcard);
int indexOfSeparatorBeforeWildCard = filespecLeftOfWildcard.LastIndexOfAny(directorySeparatorCharacters);
if (-1 == indexOfSeparatorBeforeWildCard)
{
/*
* There is no separator before the wildcard, so the form is like this:
*
* dir?\Source.cs
*
* or this,
*
* dir?\**
*/
fixedDirectoryPart = string.Empty;
wildcardDirectoryPart = filespec.Substring(0, indexOfLastDirectorySeparator + 1);
filenamePart = filespec.Substring(indexOfLastDirectorySeparator + 1);
return;
}
/*
* There is at least one wildcard and one dir separator, split parts out.
*/
fixedDirectoryPart = filespec.Substring(0, indexOfSeparatorBeforeWildCard + 1);
wildcardDirectoryPart = filespec.Substring(indexOfSeparatorBeforeWildCard + 1, indexOfLastDirectorySeparator - indexOfSeparatorBeforeWildCard);
filenamePart = filespec.Substring(indexOfLastDirectorySeparator + 1);
}
/// <summary>
/// Removes the leading ".\" from all of the paths in the array.
/// </summary>
/// <param name="paths">Paths to remove .\ from.</param>
private static IEnumerable<string> RemoveInitialDotSlash
(
IEnumerable<string> paths
)
{
foreach (string path in paths)
{
if (path.StartsWith(s_thisDirectory, StringComparison.Ordinal))
{
yield return path.Substring(2);
}
else
{
yield return path;
}
}
}
/// <summary>
/// Checks if the char is a DirectorySeparatorChar or a AltDirectorySeparatorChar
/// </summary>
/// <param name="c"></param>
/// <returns></returns>
internal static bool IsDirectorySeparator(char c)
{
return c == Path.DirectorySeparatorChar || c == Path.AltDirectorySeparatorChar;
}
/// <summary>
/// Removes the current directory converting the file back to relative path
/// </summary>
/// <param name="paths">Paths to remove current directory from.</param>
/// <param name="projectDirectory"></param>
internal static IEnumerable<string> RemoveProjectDirectory
(
IEnumerable<string> paths,
string projectDirectory
)
{
bool directoryLastCharIsSeparator = IsDirectorySeparator(projectDirectory[projectDirectory.Length - 1]);
foreach (string path in paths)
{
if (path.StartsWith(projectDirectory, StringComparison.Ordinal))
{
// If the project directory did not end in a slash we need to check to see if the next char in the path is a slash
if (!directoryLastCharIsSeparator)
{
// If the next char after the project directory is not a slash, skip this path
if (path.Length <= projectDirectory.Length || !IsDirectorySeparator(path[projectDirectory.Length]))
{
yield return path;
continue;
}
yield return path.Substring(projectDirectory.Length + 1);
}
else
{
yield return path.Substring(projectDirectory.Length);
}
}
else
{
yield return path;
}
}
}
struct RecursiveStepResult
{
public string RemainingWildcardDirectory;
public bool ConsiderFiles;
public bool NeedsToProcessEachFile;
public string DirectoryPattern;
public bool NeedsDirectoryRecursion;
}
class FilesSearchData
{
public FilesSearchData(
string filespec, // can be null
string directoryPattern, // can be null
Regex regexFileMatch, // can be null
bool needsRecursion
)
{
Filespec = filespec;
DirectoryPattern = directoryPattern;
RegexFileMatch = regexFileMatch;
NeedsRecursion = needsRecursion;
}
/// <summary>
/// The filespec.
/// </summary>
public string Filespec { get; }
/// <summary>
/// Holds the directory pattern for globs like **/{pattern}/**, i.e. when we're looking for a matching directory name
/// regardless of where on the path it is. This field is used only if the wildcard directory part has this shape. In
/// other cases such as **/{pattern1}/**/{pattern2}/**, we don't use this optimization and instead rely on
/// <see cref="RegexFileMatch"/> to test if a file path matches the glob or not.
/// </summary>
public string DirectoryPattern { get; }
/// <summary>
/// Wild-card matching.
/// </summary>
public Regex RegexFileMatch { get; }
/// <summary>
/// If true, then recursion is required.
/// </summary>
public bool NeedsRecursion { get; }
}
struct RecursionState
{
/// <summary>
/// The directory to search in
/// </summary>
public string BaseDirectory;
/// <summary>
/// The remaining, wildcard part of the directory.
/// </summary>
public string RemainingWildcardDirectory;
/// <summary>
/// True if SearchData.DirectoryPattern is non-null and we have descended into a directory that matches the pattern.
/// </summary>
public bool IsInsideMatchingDirectory;
/// <summary>
/// Data about a search that does not change as the search recursively traverses directories
/// </summary>
public FilesSearchData SearchData;
/// <summary>
/// True if a SearchData.DirectoryPattern is specified but we have not descended into a matching directory.
/// </summary>
public bool IsLookingForMatchingDirectory => (SearchData.DirectoryPattern != null && !IsInsideMatchingDirectory);
}
/// <summary>
/// Get all files that match either the file-spec or the regular expression.
/// </summary>
/// <param name="listOfFiles">List of files that gets populated.</param>
/// <param name="recursionState">Information about the search</param>
/// <param name="projectDirectory"></param>
/// <param name="stripProjectDirectory"></param>
/// <param name="searchesToExclude">Patterns to exclude from the results</param>
/// <param name="searchesToExcludeInSubdirs">exclude patterns that might activate farther down the directory tree. Keys assume paths are normalized with forward slashes and no trailing slashes</param>
/// <param name="taskOptions">Options for tuning the parallelization of subdirectories</param>
private void GetFilesRecursive(
ConcurrentStack<List<string>> listOfFiles,
RecursionState recursionState,
string projectDirectory,
bool stripProjectDirectory,
IList<RecursionState> searchesToExclude,
Dictionary<string, List<RecursionState>> searchesToExcludeInSubdirs,
TaskOptions taskOptions)
{
ErrorUtilities.VerifyThrow((recursionState.SearchData.Filespec == null) || (recursionState.SearchData.RegexFileMatch == null),
"File-spec overrides the regular expression -- pass null for file-spec if you want to use the regular expression.");
ErrorUtilities.VerifyThrow((recursionState.SearchData.Filespec != null) || (recursionState.SearchData.RegexFileMatch != null),
"Need either a file-spec or a regular expression to match files.");
ErrorUtilities.VerifyThrow(recursionState.RemainingWildcardDirectory != null, "Expected non-null remaning wildcard directory.");
RecursiveStepResult[] excludeNextSteps = null;
// Determine if any of searchesToExclude is necessarily a superset of the results that will be returned.
// This means all results will be excluded and we should bail out now.
if (searchesToExclude != null)
{
excludeNextSteps = new RecursiveStepResult[searchesToExclude.Count];
for (int i = 0; i < searchesToExclude.Count; i++)
{
RecursionState searchToExclude = searchesToExclude[i];
// The BaseDirectory of all the exclude searches should be the same as the include one
Debug.Assert(FileUtilities.PathsEqual(searchToExclude.BaseDirectory, recursionState.BaseDirectory), "Expected exclude search base directory to match include search base directory");
excludeNextSteps[i] = GetFilesRecursiveStep(searchesToExclude[i]);
// We can exclude all results in this folder if:
if (
// We are not looking for a directory matching the pattern given in SearchData.DirectoryPattern
!searchToExclude.IsLookingForMatchingDirectory &&
// We are matching files based on a filespec and not a regular expression
searchToExclude.SearchData.Filespec != null &&
// The wildcard path portion of the excluded search matches the include search
searchToExclude.RemainingWildcardDirectory == recursionState.RemainingWildcardDirectory &&
// The exclude search will match ALL filenames OR
(IsAllFilesWildcard(searchToExclude.SearchData.Filespec) ||
// The exclude search filename pattern matches the include search's pattern
searchToExclude.SearchData.Filespec == recursionState.SearchData.Filespec))
{
// We won't get any results from this search that we would end up keeping
return;
}
}
}
RecursiveStepResult nextStep = GetFilesRecursiveStep(recursionState);
List<string> files = null;
foreach (string file in GetFilesForStep(nextStep, recursionState, projectDirectory,
stripProjectDirectory))
{
if (excludeNextSteps != null)
{
bool exclude = false;
for (int i = 0; i < excludeNextSteps.Length; i++)
{
RecursiveStepResult excludeNextStep = excludeNextSteps[i];
if (excludeNextStep.ConsiderFiles && MatchFileRecursionStep(searchesToExclude[i], file))
{
exclude = true;
break;
}
}
if (exclude)
{
continue;
}
}
files ??= new List<string>();
files.Add(file);
}
// Add all matched files at once to reduce thread contention
if (files?.Count > 0)
{
listOfFiles.Push(files);
}
if (!nextStep.NeedsDirectoryRecursion)
{
return;
}
Action<string> processSubdirectory = subdir =>
{
// RecursionState is a struct so this copies it
var newRecursionState = recursionState;
newRecursionState.BaseDirectory = subdir;
newRecursionState.RemainingWildcardDirectory = nextStep.RemainingWildcardDirectory;
if (newRecursionState.IsLookingForMatchingDirectory &&
DirectoryEndsWithPattern(subdir, recursionState.SearchData.DirectoryPattern))
{
newRecursionState.IsInsideMatchingDirectory = true;
}
List<RecursionState> newSearchesToExclude = null;
if (excludeNextSteps != null)
{
newSearchesToExclude = new List<RecursionState>();
for (int i = 0; i < excludeNextSteps.Length; i++)
{
if (excludeNextSteps[i].NeedsDirectoryRecursion &&
(excludeNextSteps[i].DirectoryPattern == null || IsFileNameMatch(subdir, excludeNextSteps[i].DirectoryPattern)))
{
RecursionState thisExcludeStep = searchesToExclude[i];
thisExcludeStep.BaseDirectory = subdir;
thisExcludeStep.RemainingWildcardDirectory = excludeNextSteps[i].RemainingWildcardDirectory;
if (thisExcludeStep.IsLookingForMatchingDirectory &&
DirectoryEndsWithPattern(subdir, thisExcludeStep.SearchData.DirectoryPattern))
{
thisExcludeStep.IsInsideMatchingDirectory = true;
}
newSearchesToExclude.Add(thisExcludeStep);
}
}
}
if (searchesToExcludeInSubdirs != null)
{
List<RecursionState> searchesForSubdir;
if (searchesToExcludeInSubdirs.TryGetValue(subdir, out searchesForSubdir))
{
// We've found the base directory that these exclusions apply to. So now add them as normal searches
if (newSearchesToExclude == null)
{
newSearchesToExclude = new List<RecursionState>();
}
newSearchesToExclude.AddRange(searchesForSubdir);
}
}
// We never want to strip the project directory from the leaves, because the current
// process directory maybe different
GetFilesRecursive(
listOfFiles,
newRecursionState,
projectDirectory,
stripProjectDirectory,
newSearchesToExclude,
searchesToExcludeInSubdirs,
taskOptions);
};
// Calcuate the MaxDegreeOfParallelism value in order to prevent too much tasks being running concurrently.
int dop = 0;
// Lock only when we may be dealing with multiple threads
if (taskOptions.MaxTasks > 1 && taskOptions.MaxTasksPerIteration > 1)
{
// We don't need to lock when there will be only one Parallel.ForEach running
// If the condition is true, means that we are going to iterate though the project root folder
// by using only one Parallel.ForEach
if (taskOptions.MaxTasks == taskOptions.MaxTasksPerIteration)
{
dop = taskOptions.AvailableTasks;
taskOptions.AvailableTasks = 0;
}
else
{
lock (taskOptions)
{
dop = Math.Min(taskOptions.MaxTasksPerIteration, taskOptions.AvailableTasks);
taskOptions.AvailableTasks -= dop;