From 308b0076e8e743ba209d06503356d63d54984ffa Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Thu, 8 Jun 2023 09:00:41 +0200 Subject: [PATCH] Replace rdfind invocation with Task that replaces duplicate files with links. --- .../Utilities.cs | 12 +- src/core-sdk-tasks/RemoveDuplicateFiles.cs | 161 ++++++++++++++++++ src/core-sdk-tasks/core-sdk-tasks.csproj | 1 + src/redist/targets/BuildCoreSdkTasks.targets | 1 + src/redist/targets/GenerateLayout.targets | 10 +- 5 files changed, 173 insertions(+), 12 deletions(-) create mode 100644 src/core-sdk-tasks/RemoveDuplicateFiles.cs diff --git a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs index 86fc70b4b..6bdee2402 100644 --- a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs +++ b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs @@ -16,11 +16,15 @@ namespace Microsoft.DotNet.SourceBuild.SmokeTests; public static class Utilities { - public static void ExtractTarball(string tarballPath, string outputDir) + public static void ExtractTarball(string tarballPath, string outputDir, ITestOutputHelper outputHelper) { - using FileStream fileStream = File.OpenRead(tarballPath); - using GZipStream decompressorStream = new(fileStream, CompressionMode.Decompress); - TarFile.ExtractToDirectory(decompressorStream, outputDir, true); + // TarFile doesn't properly handle hard links (https://github.com/dotnet/runtime/pull/85378#discussion_r1221817490), + // use 'tar' instead. + ExecuteHelper.ExecuteProcessValidateExitCode("tar", $"xzf {tarballPath} -C {outputDir}", outputHelper); + + // using FileStream fileStream = File.OpenRead(tarballPath); + // using GZipStream decompressorStream = new(fileStream, CompressionMode.Decompress); + // TarFile.ExtractToDirectory(decompressorStream, outputDir, true); } public static void ExtractTarball(string tarballPath, string outputDir, string targetFilePath) diff --git a/src/core-sdk-tasks/RemoveDuplicateFiles.cs b/src/core-sdk-tasks/RemoveDuplicateFiles.cs new file mode 100644 index 000000000..aa280d975 --- /dev/null +++ b/src/core-sdk-tasks/RemoveDuplicateFiles.cs @@ -0,0 +1,161 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#nullable enable + +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.IO; +using System.IO.Enumeration; +using System.IO.MemoryMappedFiles; +using System.Linq; +using System.Runtime.InteropServices; +using Microsoft.Build.Framework; +using Microsoft.Build.Utilities; + +namespace Microsoft.DotNet.Build.Tasks +{ + /// + /// Replaces files that have the same content with hard links. + /// + public sealed class RemoveDuplicateFiles : Task + { + /// + /// The path to the directory. + /// + [Required] + public string Directory { get; set; } = ""; + + public override bool Execute() + { + if (OperatingSystem.IsWindows()) + { + Log.LogError($"{nameof(RemoveDuplicateFiles)} is not supported on Windows."); + return false; + } + + if (!System.IO.Directory.Exists(Directory)) + { + Log.LogError($"'{Directory}' does not exist."); + return false; + } + + // Find all non-empty, non-symbolic link files. + IEnumerable fse = new FileSystemEnumerable( + Directory, + (ref FileSystemEntry entry) => (FileInfo)entry.ToFileSystemInfo(), + new EnumerationOptions() + { + AttributesToSkip = FileAttributes.ReparsePoint, + RecurseSubdirectories = true + }) + { + ShouldIncludePredicate = (ref FileSystemEntry entry) => !entry.IsDirectory + && entry.Length > 0 + }; + + // Group them by file size. + IEnumerable filesGroupedBySize = fse.GroupBy(file => file.Length, + file => file.FullName, + (size, files) => files.ToArray()); + + // Replace files with same content with hard link. + foreach (var files in filesGroupedBySize) + { + for (int i = 0; i < files.Length; i++) + { + string? path1 = files[i]; + if (path1 is null) + { + continue; // already linked. + } + for (int j = i + 1; j < files.Length; j++) + { + string? path2 = files[j]; + if (path2 is null) + { + continue; // already linked. + } + + // note: There's no public API we can use to see if paths are already linked. + // We treat those paths as unlinked files, and link them again. + if (FilesHaveSameContent(path1, path2)) + { + ReplaceByLink(path1, path2); + + files[j] = null; + } + } + } + } + + return true; + } + + private unsafe bool FilesHaveSameContent(string path1, string path2) + { + using var mappedFile1 = MemoryMappedFile.CreateFromFile(path1, FileMode.Open); + using var accessor1 = mappedFile1.CreateViewAccessor(); + byte* ptr1 = null; + + using var mappedFile2 = MemoryMappedFile.CreateFromFile(path2, FileMode.Open); + using var accessor2 = mappedFile2.CreateViewAccessor(); + byte* ptr2 = null; + + try + { + accessor1.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr1); + Span span1 = new Span(ptr1, checked((int)accessor1.SafeMemoryMappedViewHandle.ByteLength)); + + accessor2.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr2); + Span span2 = new Span(ptr2, checked((int)accessor2.SafeMemoryMappedViewHandle.ByteLength)); + + return span1.SequenceEqual(span2); + } + finally + { + if (ptr1 != null) + { + accessor1.SafeMemoryMappedViewHandle.ReleasePointer(); + ptr1 = null; + } + if (ptr2 != null) + { + accessor2.SafeMemoryMappedViewHandle.ReleasePointer(); + ptr2 = null; + } + } + } + + void ReplaceByLink(string path1, string path2) + { + // To link, the target mustn't exist. Make a backup, so we can restore it when linking fails. + string path2Backup = $"{path2}.pre_link_backup"; + File.Move(path2, path2Backup); + + int rv = SystemNative_Link(path1, path2); + if (rv != 0) + { + var ex = new Win32Exception(); // Captures the LastError. + + Log.LogError($"Unable to link '{path2}' to '{path1}.': {ex}"); + + File.Move(path2Backup, path2); + + throw ex; + } + else + { + File.Delete(path2Backup); + + Log.LogMessage(MessageImportance.Normal, $"Linked '{path1}' and '{path2}'."); + } + } + + // This native method is used by the runtime to create hard links. It is not exposed through a public .NET API. + [DllImport("libSystem.Native", SetLastError = true)] + static extern int SystemNative_Link(string source, string link); + } +} diff --git a/src/core-sdk-tasks/core-sdk-tasks.csproj b/src/core-sdk-tasks/core-sdk-tasks.csproj index b90bc55a9..a105248a7 100644 --- a/src/core-sdk-tasks/core-sdk-tasks.csproj +++ b/src/core-sdk-tasks/core-sdk-tasks.csproj @@ -6,6 +6,7 @@ Microsoft.DotNet.Cli.Build $(DefineConstants);SOURCE_BUILD true + true diff --git a/src/redist/targets/BuildCoreSdkTasks.targets b/src/redist/targets/BuildCoreSdkTasks.targets index e93fd0d06..cd53c8b40 100644 --- a/src/redist/targets/BuildCoreSdkTasks.targets +++ b/src/redist/targets/BuildCoreSdkTasks.targets @@ -40,5 +40,6 @@ + diff --git a/src/redist/targets/GenerateLayout.targets b/src/redist/targets/GenerateLayout.targets index 8d1cf16f8..2a52da819 100644 --- a/src/redist/targets/GenerateLayout.targets +++ b/src/redist/targets/GenerateLayout.targets @@ -567,14 +567,8 @@ - - - - - - - + Condition="'$(BundleRuntimePacks)' == 'true' and !$([MSBuild]::IsOSPlatform('WINDOWS'))"> +