From b76440930befb1fd847ef909111dc4b79a076564 Mon Sep 17 00:00:00 2001
From: Ella Hathaway <67609881+ellahathaway@users.noreply.github.com>
Date: Tue, 12 Mar 2024 10:30:23 -0600
Subject: [PATCH] Binary tooling (#18726)
---
eng/pipelines/templates/jobs/vmr-build.yml | 1 +
.../content/eng/prep-source-build.sh | 145 +++++++++++++++++-
.../eng/tools/BinaryToolKit/BinaryTool.cs | 100 ++++++++++++
.../tools/BinaryToolKit/BinaryToolKit.csproj | 27 ++++
.../CompareBinariesAgainstBaselines.cs | 134 ++++++++++++++++
.../eng/tools/BinaryToolKit/DetectBinaries.cs | 143 +++++++++++++++++
.../content/eng/tools/BinaryToolKit/Log.cs | 47 ++++++
.../content/eng/tools/BinaryToolKit/Modes.cs | 12 ++
.../eng/tools/BinaryToolKit/NuGet.config | 8 +
.../eng/tools/BinaryToolKit/Program.cs | 76 +++++++++
.../eng/tools/BinaryToolKit/RemoveBinaries.cs | 22 +++
src/VirtualMonoRepo/allowed-binaries.txt | 79 +++++-----
12 files changed, 753 insertions(+), 41 deletions(-)
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/BinaryTool.cs
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/BinaryToolKit.csproj
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/CompareBinariesAgainstBaselines.cs
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/DetectBinaries.cs
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/Log.cs
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/Modes.cs
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/NuGet.config
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/Program.cs
create mode 100644 src/SourceBuild/content/eng/tools/BinaryToolKit/RemoveBinaries.cs
diff --git a/eng/pipelines/templates/jobs/vmr-build.yml b/eng/pipelines/templates/jobs/vmr-build.yml
index f1a2cb96d..313f31133 100644
--- a/eng/pipelines/templates/jobs/vmr-build.yml
+++ b/eng/pipelines/templates/jobs/vmr-build.yml
@@ -386,6 +386,7 @@ jobs:
find artifacts/ -type f -name "*.diff" -exec rsync -R {} -t ${targetFolder} \;
if [[ "${{ parameters.buildSourceOnly }}" == "True" ]]; then
find artifacts/prebuilt-report/ -exec rsync -R {} -t ${targetFolder} \;
+ find artifacts/log/binary-report/ -exec rsync -R {} -t ${targetFolder} \;
fi
find src/ -type f -name "*.binlog" -exec rsync -R {} -t ${targetFolder} \;
find src/ -type f -name "*.log" -exec rsync -R {} -t ${targetFolder} \;
diff --git a/src/SourceBuild/content/eng/prep-source-build.sh b/src/SourceBuild/content/eng/prep-source-build.sh
index 8c6fecef5..81352d133 100755
--- a/src/SourceBuild/content/eng/prep-source-build.sh
+++ b/src/SourceBuild/content/eng/prep-source-build.sh
@@ -2,8 +2,9 @@
### Usage: $0
###
-### Prepares the environment for a source build by downloading Private.SourceBuilt.Artifacts.*.tar.gz and
-### installing the version of dotnet referenced in global.json
+### Prepares the environment for a source build by downloading Private.SourceBuilt.Artifacts.*.tar.gz,
+### installing the version of dotnet referenced in global.json,
+### and detecting binaries and removing any non-SB allowed binaries.
###
### Options:
### --no-artifacts Exclude the download of the previously source-built artifacts archive
@@ -15,6 +16,21 @@
### --runtime-source-feed URL of a remote server or a local directory, from which SDKs and
### runtimes can be downloaded
### --runtime-source-feed-key Key for accessing the above server, if necessary
+###
+### Binary-Tooling options:
+### --no-binary-tooling Don't run the binary tooling
+### --allowed-binaries Path to the file containing the list of known binaries that are allowed
+### in the VMR and can be kept for source-building.
+### Default is src/installer/src/VirtualMonoRepo/allowed-binaries.txt
+### --disallowed-sb-binaries Path to the file containing the list of known binaries that are allowed
+### in the VMR but cannot be kept for source-building.
+### Default is null.
+### --with-sdk Use the SDK in the specified directory
+### Default is the .NET SDK
+### --with-packages URL or specified directory to use as the source feed for packages
+### Default is the previously source-built artifacts archive
+### --no-validate Do not run validation. Only remove the binaries.
+### --no-clean Do not remove the binaries. Only run the validation.
set -euo pipefail
IFS=$'\n\t'
@@ -26,8 +42,16 @@ function print_help () {
sed -n '/^### /,/^$/p' "$source" | cut -b 5-
}
+# SB prep default arguments
defaultArtifactsRid='centos.8-x64'
+# Binary Tooling default arguments
+defaultAllowedBinaries="$REPO_ROOT/src/installer/src/VirtualMonoRepo/allowed-binaries.txt"
+defaultDotnetSdk="$REPO_ROOT/.dotnet"
+defaultPackagesDir="$REPO_ROOT/prereqs/packages"
+defaultMode="All"
+
+# SB prep arguments
buildBootstrap=true
downloadArtifacts=true
downloadPrebuilts=true
@@ -35,6 +59,15 @@ installDotnet=true
artifactsRid=$defaultArtifactsRid
runtime_source_feed='' # IBM requested these to support s390x scenarios
runtime_source_feed_key='' # IBM requested these to support s390x scenarios
+
+# Binary Tooling arguments
+runBinaryTool=true
+allowedBinaries=$defaultAllowedBinaries
+disallowedSbBinaries=''
+dotnetSdk=$defaultDotnetSdk
+packagesSourceFeed=$defaultPackagesDir
+mode=$defaultMode
+
positional_args=()
while :; do
if [ $# -le 0 ]; then
@@ -69,6 +102,47 @@ while :; do
runtime_source_feed_key=$2
shift
;;
+ --no-binary-tooling)
+ runBinaryTool=false
+ ;;
+ --allowed-binaries)
+ allowedBinaries=$2
+ if [ ! -f "$allowedBinaries" ]; then
+ echo "Allowed binaries file '$allowedBinaries' does not exist"
+ exit 1
+ fi
+ shift
+ ;;
+ --disallowed-sb-binaries)
+ disallowedSbBinaries=$2
+ if [ ! -f "$disallowedSbBinaries" ]; then
+ echo "Disallowed source build binaries file '$disallowedSbBinaries' does not exist"
+ exit 1
+ fi
+ shift
+ ;;
+ --with-sdk)
+ dotnetSdk=$2
+ if [ ! -d "$dotnetSdk" ]; then
+ echo "Custom SDK directory '$dotnetSdk' does not exist"
+ exit 1
+ fi
+ if [ ! -x "$dotnetSdk/dotnet" ]; then
+ echo "Custom SDK '$dotnetSdk/dotnet' does not exist or is not executable"
+ exit 1
+ fi
+ shift
+ ;;
+ --with-packages)
+ packagesSourceFeed=$2
+ shift
+ ;;
+ --no-clean)
+ mode="Validate"
+ ;;
+ --no-validate)
+ mode="Clean"
+ ;;
*)
positional_args+=("$1")
;;
@@ -112,6 +186,56 @@ if [ "$installDotnet" == true ] && [ -d "$REPO_ROOT/.dotnet" ]; then
installDotnet=false;
fi
+function ParseBinaryArgs {
+ # Attempting to run the binary tooling without an SDK will fail. So either the --with-sdk flag must be passed
+ # or a pre-existing .dotnet SDK directory must exist.
+ if [ "$dotnetSdk" == "$defaultDotnetSdk" ] && [ ! -d "$dotnetSdk" ]; then
+ echo " ERROR: A pre-existing .dotnet SDK directory is needed if --with-sdk is not provided. \
+ Please either supply an SDK using --with-sdk or execute ./eng/prep-source-build.sh before proceeding. Exiting..."
+ exit 1
+ fi
+
+ ## Attemping to run the binary tooling without a packages directory or source-feed will fail. So either the
+ ## --with-packages flag must be passed with a valid directory or a pre-existing packages directory must exist.
+ if [ "$packagesSourceFeed" == "$defaultPackagesDir" ] && [ ! -d "$packagesSourceFeed" ]; then
+ echo " ERROR: A pre-existing packages directory is needed if --with-packages is not provided. \
+ Please either supply a packages directory using --with-packages or \
+ execute ./eng/prep-source-build.sh with download artifacts enabled before proceeding. Exiting..."
+ exit 1
+ fi
+
+ # Attempting to run the binary tooling with a custom packages feed that does not
+ # have PackageVersions.props in the packages directory or source-feed will fail.
+ if [ "$packagesSourceFeed" != "$defaultPackagesDir" ] && [ ! -f "$packagesSourceFeed/PackageVersions.props" ]; then
+ echo " ERROR: PackageVersions.props is needed in the packages directory or source-feed. Exiting..."
+ exit 1
+ fi
+
+ # Set up the packages source feed if we're using the default artifacts
+ previouslyBuiltPackagesDir="$defaultPackagesDir/previously-source-built"
+ packageArtifacts="$defaultPackagesDir/archive/Private.SourceBuilt.Artifacts.*.tar.gz"
+ if [ "$packagesSourceFeed" == "$defaultPackagesDir" ]; then
+ if [ -d "$previouslyBuiltPackagesDir" ]; then
+ echo " Previously source built packages directory exists..."
+ echo " Using $previouslyBuiltPackagesDir as the source-feed for the binary tooling..."
+ packagesSourceFeed="$previouslyBuiltPackagesDir"
+ elif [ -f ${packageArtifacts} ]; then
+ echo " Unpacking Private.SourceBuilt.Artifacts.*.tar.gz to $previouslyBuiltPackagesDir..."
+ mkdir -p "$previouslyBuiltPackagesDir"
+ tar -xzf ${packageArtifacts} -C "$previouslyBuiltPackagesDir"
+ tar -xzf ${packageArtifacts} -C "$previouslyBuiltPackagesDir" PackageVersions.props
+
+ echo " Using $previouslyBuiltPackagesDir as the source-feed for the binary tooling..."
+ packagesSourceFeed="$previouslyBuiltPackagesDir"
+ else
+ echo " ERROR: A pre-existing package archive is needed if --with-packages is not provided. \
+ Please either supply a source-feed using --with-packages or execute ./eng/prep-source-build.sh \
+ with download artifacts enabled before proceeding. Exiting..."
+ exit 1
+ fi
+ fi
+}
+
function DownloadArchive {
archiveType="$1"
isRequired="$2"
@@ -171,6 +295,18 @@ function BootstrapArtifacts {
rm -rf "$workingDir"
}
+function RunBinaryTool {
+ BinaryTool="$REPO_ROOT/eng/tools/BinaryToolKit"
+ TargetDir="$REPO_ROOT"
+ OutputDir="$REPO_ROOT/artifacts/log/binary-report"
+
+ # Set the environment variable for the packages source feed
+ export ARTIFACTS_PATH="$packagesSourceFeed"
+
+ # Run the BinaryDetection tool
+ "$dotnetSdk/dotnet" run --project "$BinaryTool" -c Release -p PackagesPropsDirectory="$packagesSourceFeed" "$TargetDir" "$OutputDir" -ab "$allowedBinaries" -db "$disallowedSbBinaries" -m $mode -l Debug
+}
+
# Check for the version of dotnet to install
if [ "$installDotnet" == true ]; then
echo " Installing dotnet..."
@@ -189,3 +325,8 @@ fi
if [ "$downloadPrebuilts" == true ]; then
DownloadArchive Prebuilts false $artifactsRid
fi
+
+if [ "$runBinaryTool" == true ]; then
+ ParseBinaryArgs
+ RunBinaryTool
+fi
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/BinaryTool.cs b/src/SourceBuild/content/eng/tools/BinaryToolKit/BinaryTool.cs
new file mode 100644
index 000000000..9108df223
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/BinaryTool.cs
@@ -0,0 +1,100 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace BinaryToolKit;
+
+public class BinaryTool
+{
+ public async Task ExecuteAsync(
+ string targetDirectory,
+ string outputReportDirectory,
+ string? allowedBinariesFile,
+ string? disallowedSbBinariesFile,
+ Modes mode)
+ {
+ DateTime startTime = DateTime.Now;
+
+ Log.LogInformation($"Starting binary tool at {startTime} in {mode} mode");
+
+ // Parse args
+ targetDirectory = GetAndValidateFullPath(
+ "TargetDirectory",
+ targetDirectory,
+ isDirectory: true,
+ createIfNotExist: false,
+ isRequired: true)!;
+ outputReportDirectory = GetAndValidateFullPath(
+ "OutputReportDirectory",
+ outputReportDirectory,
+ isDirectory: true,
+ createIfNotExist: true,
+ isRequired: true)!;
+ allowedBinariesFile = GetAndValidateFullPath(
+ "AllowedBinariesFile",
+ allowedBinariesFile,
+ isDirectory: false,
+ createIfNotExist: false,
+ isRequired: false);
+ disallowedSbBinariesFile = GetAndValidateFullPath(
+ "DisallowedSbBinariesFile",
+ disallowedSbBinariesFile,
+ isDirectory: false,
+ createIfNotExist: false,
+ isRequired: false);
+
+ // Run the tooling
+ var detectedBinaries = await DetectBinaries.ExecuteAsync(targetDirectory);
+
+ var comparedBinaries = CompareBinariesAgainstBaselines
+ .Execute(
+ detectedBinaries,
+ allowedBinariesFile,
+ disallowedSbBinariesFile,
+ outputReportDirectory,
+ targetDirectory,
+ mode);
+
+ if (mode.HasFlag(Modes.Clean))
+ {
+ RemoveBinaries.Execute(comparedBinaries, targetDirectory);
+ }
+
+ Log.LogInformation("Finished all binary tasks. Took " + (DateTime.Now - startTime).TotalSeconds + " seconds.");
+ }
+
+ private string? GetAndValidateFullPath(
+ string parameterName,
+ string? path,
+ bool isDirectory,
+ bool createIfNotExist,
+ bool isRequired)
+ {
+ if (string.IsNullOrWhiteSpace(path))
+ {
+ if (isRequired)
+ {
+ Log.LogError($"Required path for '{parameterName}' is empty or contains whitespace.");
+ Environment.Exit(1);
+ }
+ return null;
+ }
+
+ string fullPath = Path.GetFullPath(path);
+ bool exists = isDirectory ? Directory.Exists(fullPath) : File.Exists(fullPath);
+
+ if (!exists)
+ {
+ if (createIfNotExist && isDirectory)
+ {
+ Log.LogInformation($"Creating directory '{fullPath}' for '{parameterName}'.");
+ Directory.CreateDirectory(fullPath);
+ }
+ else
+ {
+ Log.LogError($"{(isDirectory ? "Directory" : "File")} '{fullPath}' for '{parameterName}' does not exist.");
+ Environment.Exit(1);
+ }
+ }
+ return fullPath;
+ }
+}
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/BinaryToolKit.csproj b/src/SourceBuild/content/eng/tools/BinaryToolKit/BinaryToolKit.csproj
new file mode 100644
index 000000000..7fd4c2186
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/BinaryToolKit.csproj
@@ -0,0 +1,27 @@
+
+
+
+ $(NetCurrent)
+ enable
+ enable
+
+ Exe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/CompareBinariesAgainstBaselines.cs b/src/SourceBuild/content/eng/tools/BinaryToolKit/CompareBinariesAgainstBaselines.cs
new file mode 100644
index 000000000..22762c9e3
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/CompareBinariesAgainstBaselines.cs
@@ -0,0 +1,134 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Microsoft.Extensions.FileSystemGlobbing;
+
+namespace BinaryToolKit;
+
+public static class CompareBinariesAgainstBaselines
+{
+ public static List Execute(
+ IEnumerable detectedBinaries,
+ string? allowedBinariesFile,
+ string? disallowedSbBinariesFile,
+ string outputReportDirectory,
+ string targetDirectory,
+ Modes mode)
+ {
+ Log.LogInformation("Comparing detected binaries to baseline(s).");
+
+ var binariesToRemove = GetUnmatchedBinaries(
+ detectedBinaries,
+ allowedBinariesFile,
+ outputReportDirectory,
+ targetDirectory,
+ mode).ToList();
+
+ if (mode.HasFlag(Modes.Validate))
+ {
+ var nonSbBinariesToRemove = GetUnmatchedBinaries(
+ detectedBinaries,
+ disallowedSbBinariesFile,
+ outputReportDirectory,
+ targetDirectory,
+ mode).ToList();
+
+ var newBinaries = binariesToRemove.Intersect(nonSbBinariesToRemove);
+
+ if (newBinaries.Any())
+ {
+ string newBinariesFile = Path.Combine(outputReportDirectory, "NewBinaries.txt");
+
+ File.WriteAllLines(newBinariesFile, newBinaries);
+
+ Log.LogWarning($" {newBinaries.Count()} new binaries detected. Check '{newBinariesFile}' for details.");
+ }
+ }
+
+ Log.LogInformation("Finished comparing binaries.");
+
+ return binariesToRemove;
+ }
+
+ private static IEnumerable GetUnmatchedBinaries(
+ IEnumerable searchFiles,
+ string? baselineFile,
+ string outputReportDirectory,
+ string targetDirectory,
+ Modes mode)
+ {
+ var patterns = ParseBaselineFile(baselineFile);
+
+ if (mode.HasFlag(Modes.Validate))
+ {
+ // If validating in any mode (Mode == Validate or Mode == All),
+ // we need to detect both unused patterns and unmatched files.
+ // We simultaneously detect unused patterns and unmatched files for efficiency.
+
+ HashSet unusedPatterns = new HashSet(patterns);
+ HashSet unmatchedFiles = new HashSet(searchFiles);
+
+ foreach (string pattern in patterns)
+ {
+ Matcher matcher = new Matcher(StringComparison.Ordinal);
+ matcher.AddInclude(pattern);
+
+ var matches = matcher.Match(targetDirectory, searchFiles);
+ if (matches.HasMatches)
+ {
+ unusedPatterns.Remove(pattern);
+ unmatchedFiles.ExceptWith(matches.Files.Select(file => file.Path));
+ }
+ }
+
+ UpdateBaselineFile(baselineFile, outputReportDirectory, unusedPatterns);
+
+ return unmatchedFiles;
+ }
+ else if (mode == Modes.Clean)
+ {
+ // If only cleaning and not validating (Mode == Clean),
+ // we don't need to update the baseline files with unused patterns
+ // so we can just detect unmatched files.
+
+ Matcher matcher = new Matcher(StringComparison.Ordinal);
+ matcher.AddInclude("**/*");
+ matcher.AddExcludePatterns(patterns);
+
+ return matcher.Match(targetDirectory, searchFiles).Files.Select(file => file.Path);
+ }
+ else
+ {
+ // Unhandled mode
+ throw new ArgumentException($"Unhandled mode: {mode}");
+ }
+ }
+
+ private static IEnumerable ParseBaselineFile(string? file) {
+ if (!File.Exists(file))
+ {
+ return Enumerable.Empty();
+ }
+
+ // Read the baseline file and parse the patterns, ignoring comments and empty lines
+ return File.ReadLines(file)
+ .Select(line => line.Trim())
+ .Where(line => !string.IsNullOrWhiteSpace(line) && !line.StartsWith("#"))
+ .Select(line => line.Split('#')[0].Trim());
+ }
+
+ private static void UpdateBaselineFile(string? file, string outputReportDirectory, HashSet unusedPatterns)
+ {
+ if(File.Exists(file))
+ {
+ var lines = File.ReadAllLines(file);
+ var newLines = lines.Where(line => !unusedPatterns.Contains(line)).ToList();
+
+ string updatedFile = Path.Combine(outputReportDirectory, "Updated" + Path.GetFileName(file));
+
+ File.WriteAllLines(updatedFile, newLines);
+
+ Log.LogInformation($" Updated baseline file '{file}' written to '{updatedFile}'");
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/DetectBinaries.cs b/src/SourceBuild/content/eng/tools/BinaryToolKit/DetectBinaries.cs
new file mode 100644
index 000000000..25346f7c9
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/DetectBinaries.cs
@@ -0,0 +1,143 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using Microsoft.Extensions.FileSystemGlobbing;
+using System.Text.RegularExpressions;
+
+namespace BinaryToolKit;
+
+public static class DetectBinaries
+{
+ private const string Utf16Marker = "UTF-16";
+ private const int ChunkSize = 4096;
+ private static readonly Regex GitCleanRegex = new Regex(@"Would (remove|skip)( repository)? (.*)");
+
+ public static async Task> ExecuteAsync(string targetDirectory)
+ {
+ Log.LogInformation($"Detecting binaries in '{targetDirectory}'...");
+
+ var matcher = new Matcher(StringComparison.Ordinal);
+ matcher.AddInclude("**/*");
+ matcher.AddExcludePatterns(await GetIgnoredPatternsAsync(targetDirectory));
+
+ IEnumerable matchingFiles = matcher.GetResultsInFullPath(targetDirectory);
+
+ var tasks = matchingFiles
+ .Select(async file =>
+ {
+ return await IsBinaryAsync(file) ? file.Substring(targetDirectory.Length + 1) : null;
+ });
+
+ var binaryFiles = (await Task.WhenAll(tasks)).OfType().ToList();
+
+ Log.LogInformation($"Finished binary detection.");
+
+ return binaryFiles;
+ }
+
+ private static async Task> GetIgnoredPatternsAsync(string targetDirectory)
+ {
+ string gitDirectory = Path.Combine(targetDirectory, ".git");
+ bool isGitRepo = Directory.Exists(gitDirectory);
+
+ try
+ {
+ if (!isGitRepo)
+ {
+ // Configure a fake git repo to use so that we can run git clean -ndx
+ await ExecuteProcessAsync("git", $"-C {targetDirectory} init -q");
+ }
+
+ await ExecuteProcessAsync("git", $"-C {targetDirectory} config --global safe.directory {targetDirectory}");
+
+ string output = await ExecuteProcessAsync("git", $"-C {targetDirectory} clean -ndx");
+
+ List ignoredPaths = output.Split(Environment.NewLine)
+ .Select(line => GitCleanRegex.Match(line))
+ .Where(match => match.Success)
+ .Select(match => match.Groups[3].Value)
+ .ToList();
+
+ if (isGitRepo)
+ {
+ ignoredPaths.Add(".git");
+ }
+
+ return ignoredPaths;
+ }
+ finally
+ {
+ // Ensure .git directory is deleted if it wasn't originally a git repo
+ if (!isGitRepo && Directory.Exists(gitDirectory))
+ {
+ Directory.Delete(gitDirectory, true);
+ }
+ }
+ }
+
+ private static async Task IsBinaryAsync(string filePath)
+ {
+ // Using the GNU diff heuristic to determine if a file is binary or not.
+ // For more details, refer to the GNU diff manual:
+ // https://www.gnu.org/software/diffutils/manual/html_node/Binary.html
+
+ using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read))
+ using (BinaryReader br = new BinaryReader(fs))
+ {
+ byte[] buffer = new byte[ChunkSize];
+ int bytesRead = br.Read(buffer, 0, ChunkSize);
+ for (int i = 0; i < bytesRead; i++)
+ {
+ if (buffer[i] == 0)
+ {
+ // Need to check that the file is not UTF-16 encoded
+ // because heuristic can return false positives
+ return await IsNotUTF16Async(filePath);
+ }
+ }
+ }
+ return false;
+ }
+
+ private static async Task IsNotUTF16Async(string file)
+ {
+ if (Environment.OSVersion.Platform == PlatformID.Unix)
+ {
+ string output = await ExecuteProcessAsync("file", $"\"{file}\"");
+ output = output.Split(":")[1].Trim();
+
+ if (output.Contains(Utf16Marker))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static async Task ExecuteProcessAsync(string executable, string arguments)
+ {
+ ProcessStartInfo psi = new ()
+ {
+ FileName = executable,
+ Arguments = arguments,
+ CreateNoWindow = true,
+ RedirectStandardOutput = true,
+ RedirectStandardError = true
+ };
+
+ var proc = Process.Start(psi)!;
+
+ string output = await proc.StandardOutput.ReadToEndAsync();
+ string error = await proc.StandardError.ReadToEndAsync();
+
+ await proc.WaitForExitAsync();
+
+ if (!string.IsNullOrEmpty(error))
+ {
+ Log.LogError(error);
+ }
+
+ return output;
+ }
+}
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/Log.cs b/src/SourceBuild/content/eng/tools/BinaryToolKit/Log.cs
new file mode 100644
index 000000000..3a018a06d
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/Log.cs
@@ -0,0 +1,47 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Microsoft.Extensions.Logging;
+
+namespace BinaryToolKit;
+
+public static class Log
+{
+ public static LogLevel Level = LogLevel.Information;
+
+ private static readonly Lazy _logger = new Lazy(ConfigureLogger);
+
+ public static void LogDebug(string message)
+ {
+ _logger.Value.LogDebug(message);
+ }
+
+ public static void LogInformation(string message)
+ {
+ _logger.Value.LogInformation(message);
+ }
+
+ public static void LogWarning(string message)
+ {
+ _logger.Value.LogWarning(message);
+ }
+
+ public static void LogError(string message)
+ {
+ _logger.Value.LogError(message);
+ }
+
+ private static ILogger ConfigureLogger()
+ {
+ using ILoggerFactory loggerFactory =
+ LoggerFactory.Create(builder =>
+ builder.AddSimpleConsole(options =>
+ {
+ options.SingleLine = true;
+ options.TimestampFormat = "HH:mm:ss ";
+ options.UseUtcTimestamp = true;
+ })
+ .SetMinimumLevel(Level));
+ return loggerFactory.CreateLogger("BinaryTool");
+ }
+}
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/Modes.cs b/src/SourceBuild/content/eng/tools/BinaryToolKit/Modes.cs
new file mode 100644
index 000000000..d3f747445
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/Modes.cs
@@ -0,0 +1,12 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace BinaryToolKit;
+
+[Flags]
+public enum Modes
+{
+ Validate = 1,
+ Clean = 2,
+ All = Validate | Clean
+}
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/NuGet.config b/src/SourceBuild/content/eng/tools/BinaryToolKit/NuGet.config
new file mode 100644
index 000000000..b77aa3e00
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/NuGet.config
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/Program.cs b/src/SourceBuild/content/eng/tools/BinaryToolKit/Program.cs
new file mode 100644
index 000000000..1a1d28746
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/Program.cs
@@ -0,0 +1,76 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.CommandLine;
+using Microsoft.Extensions.Logging;
+
+namespace BinaryToolKit;
+
+public class Program
+{
+ public static async Task Main(string[] args)
+ {
+ CliArgument TargetDirectory = new("target-directory")
+ {
+ Description = "The directory to run the binary tooling on."
+ };
+
+ CliArgument OutputReportDirectory = new("output-report-directory")
+ {
+ Description = "The directory to output the report to."
+ };
+
+ CliOption AllowedBinariesFile = new("--allowed-binaries", "-ab")
+ {
+ Description = "The file containing the list of known binaries " +
+ "that are allowed in the VMR and can be kept for source-building."
+ };
+
+ CliOption DisallowedSbBinariesFile = new("--disallowed-sb-binaries", "-db")
+ {
+ Description = "The file containing the list of known binaries " +
+ "that are allowed in the VMR but cannot be kept for source-building."
+ };
+
+ CliOption Mode = new("--mode", "-m")
+ {
+ Description = "The mode to run the tool in.",
+ Arity = ArgumentArity.ZeroOrOne,
+ DefaultValueFactory = _ => Modes.All
+ };
+
+ CliOption Level = new("--log-level", "-l")
+ {
+ Description = "The log level to run the tool in.",
+ Arity = ArgumentArity.ZeroOrOne,
+ DefaultValueFactory = _ => LogLevel.Information
+ };
+
+ var rootCommand = new CliRootCommand("Tool for detecting, validating, and cleaning binaries in the target directory.")
+ {
+ TargetDirectory,
+ OutputReportDirectory,
+ AllowedBinariesFile,
+ DisallowedSbBinariesFile,
+ Mode,
+ Level
+ };
+
+ rootCommand.SetAction(async (result, CancellationToken) =>
+ {
+ Log.Level = result.GetValue(Level);
+
+ var binaryTool = new BinaryTool();
+
+ await binaryTool.ExecuteAsync(
+ result.GetValue(TargetDirectory)!,
+ result.GetValue(OutputReportDirectory)!,
+ result.GetValue(AllowedBinariesFile),
+ result.GetValue(DisallowedSbBinariesFile),
+ result.GetValue(Mode));
+ });
+
+ return await rootCommand.Parse(args).InvokeAsync();
+ }
+}
\ No newline at end of file
diff --git a/src/SourceBuild/content/eng/tools/BinaryToolKit/RemoveBinaries.cs b/src/SourceBuild/content/eng/tools/BinaryToolKit/RemoveBinaries.cs
new file mode 100644
index 000000000..72c34e390
--- /dev/null
+++ b/src/SourceBuild/content/eng/tools/BinaryToolKit/RemoveBinaries.cs
@@ -0,0 +1,22 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Microsoft.Extensions.Logging;
+
+namespace BinaryToolKit;
+
+public static class RemoveBinaries
+{
+ public static void Execute(IEnumerable binariesToRemove, string targetDirectory)
+ {
+ Log.LogInformation($"Removing binaries from '{targetDirectory}'...");
+
+ foreach (var binary in binariesToRemove)
+ {
+ File.Delete(Path.Combine(targetDirectory, binary));
+ Log.LogDebug($" Removed '{binary}'");
+ }
+
+ Log.LogInformation($"Finished binary removal. Removed {binariesToRemove.Count()} binaries.");
+ }
+}
\ No newline at end of file
diff --git a/src/VirtualMonoRepo/allowed-binaries.txt b/src/VirtualMonoRepo/allowed-binaries.txt
index 56f66cfb8..635a7617f 100644
--- a/src/VirtualMonoRepo/allowed-binaries.txt
+++ b/src/VirtualMonoRepo/allowed-binaries.txt
@@ -1,64 +1,65 @@
-*.bmp
-*.doc
-*.docx
-*.gif
-*.ico
-*.jpg
-*.JPG
-*.pdf
-*.png
-*.PNG
-*.rtf
-*.snk
-*.vsd
-*.vsdx
-*.xlsx
-*.ttf
-*.cur
-*.icm
-*.reg
+**/*.bmp
+**/*.doc
+**/*.docx
+**/*.gif
+**/*.ico
+**/*.jpg
+**/*.JPG
+**/*.pdf
+**/*.png
+**/*.PNG
+**/*.rtf
+**/*.snk
+**/*.vsd
+**/*.vsdx
+**/*.xlsx
+**/*.ttf
+**/*.cur
+**/*.icm
+**/*.reg
-**/test/*
-**/Test/*
-**/Test/*
+**/test/**/*
+**/Test/**/*
+**/Test/**/*
**/testCert*.pfx
**/TestCert*.pfx
-**/tests/*
+**/tests/**/*
eng/common/loc/*.lss # UTF16-LE text files
**/eng/common/loc/*.lss # UTF16-LE text files
-src/aspnetcore/**/samples/*
-src/aspnetcore/**/TestCertificates/*
-src/aspnetcore/src/*.eot
-src/aspnetcore/src/*.otf
-src/aspnetcore/src/*.woff
-src/aspnetcore/src/*.woff2
+src/aspnetcore/**/samples/**/*
+src/aspnetcore/**/TestCertificates/**/*
+src/aspnetcore/src/**/*.eot
+src/aspnetcore/src/**/*.otf
+src/aspnetcore/src/**/*.woff
+src/aspnetcore/src/**/*.woff2
src/aspnetcore/src/Components/Web.JS/dist/Release/blazor.*.js # JavaScript files with a null bytes
src/aspnetcore/src/ProjectTemplates/Web.ProjectTemplates/**/app.db
src/aspnetcore/src/submodules/spa-templates/**/app.db
+src/aspnetcore/src/submodules/Node-Externals/cache/**/* # https://github.com/dotnet/source-build/issues/4161
-src/fsharp/**/signedtests/*
+src/fsharp/**/signedtests/**/*
src/fsharp/src/fsi/fsi.res # Icon
-src/msbuild/src/Tasks.UnitTests/*
+src/msbuild/src/Tasks.UnitTests/**/*
-src/razor/**/SampleApp/**/fonts/*
+src/razor/**/SampleApp/**/fonts/**/*
-src/roslyn/**/CodeAnalysisTest/*
+src/roslyn/**/CodeAnalysisTest/**/*
src/roslyn/src/ExpressionEvaluator/Core/Source/ExpressionCompiler/Resources/WindowsProxy.winmd # https://github.com/dotnet/roslyn/issues/66718
-src/runtime/src/*.woff
-src/runtime/src/*.woff2
+src/runtime/src/**/*.woff
+src/runtime/src/**/*.woff2
src/runtime/src/libraries/System.Diagnostics.EventLog/src/Messages/EventLogMessages.res # Icon
-src/runtime/src/libraries/System.Speech/src/*.upsmap # https://github.com/dotnet/runtime/issues/81692
+src/runtime/src/libraries/System.Speech/src/**/*.upsmap # https://github.com/dotnet/runtime/issues/81692
src/runtime/src/libraries/System.Text.Encoding.CodePages/src/Data/codepages.nlp # https://github.com/dotnet/runtime/issues/81693
-src/runtime/src/mono/wasm/testassets/*
+src/runtime/src/mono/wasm/testassets/**/*
src/runtime/src/native/external/brotli/common/dictionary.bin.br
-src/sdk/src/Assets/TestProjects/*
+src/sdk/src/Assets/TestProjects/**/*
src/source-build-externals/src/azure-activedirectory-identitymodel-extensions-for-dotnet/build/strongNameBypass.reg # UTF16-LE text file
src/source-build-externals/src/azure-activedirectory-identitymodel-extensions-for-dotnet/build/strongNameBypass2.reg # UTF16-LE text file
-src/source-build-externals/src/humanizer/src/Humanizer.Tests*
+src/source-build-externals/src/humanizer/src/Humanizer.Tests**/*
src/source-build-externals/src/newtonsoft-json/Src/Newtonsoft.Json.Tests/SpaceShipV2.bson