From 308b0076e8e743ba209d06503356d63d54984ffa Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Thu, 8 Jun 2023 09:00:41 +0200 Subject: [PATCH 1/6] Replace rdfind invocation with Task that replaces duplicate files with links. --- .../Utilities.cs | 12 +- src/core-sdk-tasks/RemoveDuplicateFiles.cs | 161 ++++++++++++++++++ src/core-sdk-tasks/core-sdk-tasks.csproj | 1 + src/redist/targets/BuildCoreSdkTasks.targets | 1 + src/redist/targets/GenerateLayout.targets | 10 +- 5 files changed, 173 insertions(+), 12 deletions(-) create mode 100644 src/core-sdk-tasks/RemoveDuplicateFiles.cs diff --git a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs index 86fc70b4b..6bdee2402 100644 --- a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs +++ b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs @@ -16,11 +16,15 @@ namespace Microsoft.DotNet.SourceBuild.SmokeTests; public static class Utilities { - public static void ExtractTarball(string tarballPath, string outputDir) + public static void ExtractTarball(string tarballPath, string outputDir, ITestOutputHelper outputHelper) { - using FileStream fileStream = File.OpenRead(tarballPath); - using GZipStream decompressorStream = new(fileStream, CompressionMode.Decompress); - TarFile.ExtractToDirectory(decompressorStream, outputDir, true); + // TarFile doesn't properly handle hard links (https://github.com/dotnet/runtime/pull/85378#discussion_r1221817490), + // use 'tar' instead. + ExecuteHelper.ExecuteProcessValidateExitCode("tar", $"xzf {tarballPath} -C {outputDir}", outputHelper); + + // using FileStream fileStream = File.OpenRead(tarballPath); + // using GZipStream decompressorStream = new(fileStream, CompressionMode.Decompress); + // TarFile.ExtractToDirectory(decompressorStream, outputDir, true); } public static void ExtractTarball(string tarballPath, string outputDir, string targetFilePath) diff --git a/src/core-sdk-tasks/RemoveDuplicateFiles.cs b/src/core-sdk-tasks/RemoveDuplicateFiles.cs new file mode 100644 index 000000000..aa280d975 --- /dev/null +++ b/src/core-sdk-tasks/RemoveDuplicateFiles.cs @@ -0,0 +1,161 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#nullable enable + +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.IO; +using System.IO.Enumeration; +using System.IO.MemoryMappedFiles; +using System.Linq; +using System.Runtime.InteropServices; +using Microsoft.Build.Framework; +using Microsoft.Build.Utilities; + +namespace Microsoft.DotNet.Build.Tasks +{ + /// + /// Replaces files that have the same content with hard links. + /// + public sealed class RemoveDuplicateFiles : Task + { + /// + /// The path to the directory. + /// + [Required] + public string Directory { get; set; } = ""; + + public override bool Execute() + { + if (OperatingSystem.IsWindows()) + { + Log.LogError($"{nameof(RemoveDuplicateFiles)} is not supported on Windows."); + return false; + } + + if (!System.IO.Directory.Exists(Directory)) + { + Log.LogError($"'{Directory}' does not exist."); + return false; + } + + // Find all non-empty, non-symbolic link files. + IEnumerable fse = new FileSystemEnumerable( + Directory, + (ref FileSystemEntry entry) => (FileInfo)entry.ToFileSystemInfo(), + new EnumerationOptions() + { + AttributesToSkip = FileAttributes.ReparsePoint, + RecurseSubdirectories = true + }) + { + ShouldIncludePredicate = (ref FileSystemEntry entry) => !entry.IsDirectory + && entry.Length > 0 + }; + + // Group them by file size. + IEnumerable filesGroupedBySize = fse.GroupBy(file => file.Length, + file => file.FullName, + (size, files) => files.ToArray()); + + // Replace files with same content with hard link. + foreach (var files in filesGroupedBySize) + { + for (int i = 0; i < files.Length; i++) + { + string? path1 = files[i]; + if (path1 is null) + { + continue; // already linked. + } + for (int j = i + 1; j < files.Length; j++) + { + string? path2 = files[j]; + if (path2 is null) + { + continue; // already linked. + } + + // note: There's no public API we can use to see if paths are already linked. + // We treat those paths as unlinked files, and link them again. + if (FilesHaveSameContent(path1, path2)) + { + ReplaceByLink(path1, path2); + + files[j] = null; + } + } + } + } + + return true; + } + + private unsafe bool FilesHaveSameContent(string path1, string path2) + { + using var mappedFile1 = MemoryMappedFile.CreateFromFile(path1, FileMode.Open); + using var accessor1 = mappedFile1.CreateViewAccessor(); + byte* ptr1 = null; + + using var mappedFile2 = MemoryMappedFile.CreateFromFile(path2, FileMode.Open); + using var accessor2 = mappedFile2.CreateViewAccessor(); + byte* ptr2 = null; + + try + { + accessor1.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr1); + Span span1 = new Span(ptr1, checked((int)accessor1.SafeMemoryMappedViewHandle.ByteLength)); + + accessor2.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr2); + Span span2 = new Span(ptr2, checked((int)accessor2.SafeMemoryMappedViewHandle.ByteLength)); + + return span1.SequenceEqual(span2); + } + finally + { + if (ptr1 != null) + { + accessor1.SafeMemoryMappedViewHandle.ReleasePointer(); + ptr1 = null; + } + if (ptr2 != null) + { + accessor2.SafeMemoryMappedViewHandle.ReleasePointer(); + ptr2 = null; + } + } + } + + void ReplaceByLink(string path1, string path2) + { + // To link, the target mustn't exist. Make a backup, so we can restore it when linking fails. + string path2Backup = $"{path2}.pre_link_backup"; + File.Move(path2, path2Backup); + + int rv = SystemNative_Link(path1, path2); + if (rv != 0) + { + var ex = new Win32Exception(); // Captures the LastError. + + Log.LogError($"Unable to link '{path2}' to '{path1}.': {ex}"); + + File.Move(path2Backup, path2); + + throw ex; + } + else + { + File.Delete(path2Backup); + + Log.LogMessage(MessageImportance.Normal, $"Linked '{path1}' and '{path2}'."); + } + } + + // This native method is used by the runtime to create hard links. It is not exposed through a public .NET API. + [DllImport("libSystem.Native", SetLastError = true)] + static extern int SystemNative_Link(string source, string link); + } +} diff --git a/src/core-sdk-tasks/core-sdk-tasks.csproj b/src/core-sdk-tasks/core-sdk-tasks.csproj index b90bc55a9..a105248a7 100644 --- a/src/core-sdk-tasks/core-sdk-tasks.csproj +++ b/src/core-sdk-tasks/core-sdk-tasks.csproj @@ -6,6 +6,7 @@ Microsoft.DotNet.Cli.Build $(DefineConstants);SOURCE_BUILD true + true diff --git a/src/redist/targets/BuildCoreSdkTasks.targets b/src/redist/targets/BuildCoreSdkTasks.targets index e93fd0d06..cd53c8b40 100644 --- a/src/redist/targets/BuildCoreSdkTasks.targets +++ b/src/redist/targets/BuildCoreSdkTasks.targets @@ -40,5 +40,6 @@ + diff --git a/src/redist/targets/GenerateLayout.targets b/src/redist/targets/GenerateLayout.targets index 8d1cf16f8..2a52da819 100644 --- a/src/redist/targets/GenerateLayout.targets +++ b/src/redist/targets/GenerateLayout.targets @@ -567,14 +567,8 @@ - - - - - - - + Condition="'$(BundleRuntimePacks)' == 'true' and !$([MSBuild]::IsOSPlatform('WINDOWS'))"> + Date: Thu, 8 Jun 2023 16:09:53 +0200 Subject: [PATCH 2/6] Update ExtractTarball callers to pass OutputHelper. --- .../Microsoft.DotNet.SourceBuild.SmokeTests/DotNetHelper.cs | 2 +- .../Microsoft.DotNet.SourceBuild.SmokeTests/OmniSharpTests.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/DotNetHelper.cs b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/DotNetHelper.cs index bce9b5424..60c49d132 100644 --- a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/DotNetHelper.cs +++ b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/DotNetHelper.cs @@ -36,7 +36,7 @@ internal class DotNetHelper } Directory.CreateDirectory(Config.DotNetDirectory); - Utilities.ExtractTarball(Config.SdkTarballPath, Config.DotNetDirectory); + Utilities.ExtractTarball(Config.SdkTarballPath, Config.DotNetDirectory, outputHelper); } IsMonoRuntime = DetermineIsMonoRuntime(Config.DotNetDirectory); diff --git a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/OmniSharpTests.cs b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/OmniSharpTests.cs index fdcb48ad5..c56f443fb 100644 --- a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/OmniSharpTests.cs +++ b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/OmniSharpTests.cs @@ -65,7 +65,7 @@ public class OmniSharpTests : SmokeTests await client.DownloadFileAsync(omniSharpTarballUrl, omniSharpTarballFile, OutputHelper); Directory.CreateDirectory(OmniSharpDirectory); - Utilities.ExtractTarball(omniSharpTarballFile, OmniSharpDirectory); + Utilities.ExtractTarball(omniSharpTarballFile, OmniSharpDirectory, OutputHelper); } } } From 2d8d854867cd69d014754ff3291c7d263b9168f1 Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Fri, 9 Jun 2023 09:34:25 +0200 Subject: [PATCH 3/6] PR feedback. --- .../test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs | 4 ---- ...DuplicateFiles.cs => RemoveDuplicateFilesWithHardLinks.cs} | 4 ++-- src/redist/targets/BuildCoreSdkTasks.targets | 2 +- src/redist/targets/GenerateLayout.targets | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) rename src/core-sdk-tasks/{RemoveDuplicateFiles.cs => RemoveDuplicateFilesWithHardLinks.cs} (97%) diff --git a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs index 6bdee2402..e73a29939 100644 --- a/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs +++ b/src/SourceBuild/content/test/Microsoft.DotNet.SourceBuild.SmokeTests/Utilities.cs @@ -21,10 +21,6 @@ public static class Utilities // TarFile doesn't properly handle hard links (https://github.com/dotnet/runtime/pull/85378#discussion_r1221817490), // use 'tar' instead. ExecuteHelper.ExecuteProcessValidateExitCode("tar", $"xzf {tarballPath} -C {outputDir}", outputHelper); - - // using FileStream fileStream = File.OpenRead(tarballPath); - // using GZipStream decompressorStream = new(fileStream, CompressionMode.Decompress); - // TarFile.ExtractToDirectory(decompressorStream, outputDir, true); } public static void ExtractTarball(string tarballPath, string outputDir, string targetFilePath) diff --git a/src/core-sdk-tasks/RemoveDuplicateFiles.cs b/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs similarity index 97% rename from src/core-sdk-tasks/RemoveDuplicateFiles.cs rename to src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs index aa280d975..076828de5 100644 --- a/src/core-sdk-tasks/RemoveDuplicateFiles.cs +++ b/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs @@ -20,7 +20,7 @@ namespace Microsoft.DotNet.Build.Tasks /// /// Replaces files that have the same content with hard links. /// - public sealed class RemoveDuplicateFiles : Task + public sealed class RemoveDuplicateFilesWithHardLinks : Task { /// /// The path to the directory. @@ -32,7 +32,7 @@ namespace Microsoft.DotNet.Build.Tasks { if (OperatingSystem.IsWindows()) { - Log.LogError($"{nameof(RemoveDuplicateFiles)} is not supported on Windows."); + Log.LogError($"{nameof(RemoveDuplicateFilesWithHardLinks)} is not supported on Windows."); return false; } diff --git a/src/redist/targets/BuildCoreSdkTasks.targets b/src/redist/targets/BuildCoreSdkTasks.targets index cd53c8b40..7cf28ecdb 100644 --- a/src/redist/targets/BuildCoreSdkTasks.targets +++ b/src/redist/targets/BuildCoreSdkTasks.targets @@ -40,6 +40,6 @@ - + diff --git a/src/redist/targets/GenerateLayout.targets b/src/redist/targets/GenerateLayout.targets index 2a52da819..e6923ed03 100644 --- a/src/redist/targets/GenerateLayout.targets +++ b/src/redist/targets/GenerateLayout.targets @@ -568,7 +568,7 @@ and the corresponding shared frameworks are included in a distro package their data is shared instead of duplicated. --> - + Date: Fri, 9 Jun 2023 09:55:44 +0200 Subject: [PATCH 4/6] Don't use nullable because it's not supported by C# 7.3 builds. --- src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs b/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs index 076828de5..4377ce8e7 100644 --- a/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs +++ b/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -#nullable enable - using System; using System.Collections.Generic; using System.ComponentModel; @@ -57,7 +55,7 @@ namespace Microsoft.DotNet.Build.Tasks }; // Group them by file size. - IEnumerable filesGroupedBySize = fse.GroupBy(file => file.Length, + IEnumerable filesGroupedBySize = fse.GroupBy(file => file.Length, file => file.FullName, (size, files) => files.ToArray()); @@ -66,14 +64,14 @@ namespace Microsoft.DotNet.Build.Tasks { for (int i = 0; i < files.Length; i++) { - string? path1 = files[i]; + string path1 = files[i]; if (path1 is null) { continue; // already linked. } for (int j = i + 1; j < files.Length; j++) { - string? path2 = files[j]; + string path2 = files[j]; if (path2 is null) { continue; // already linked. From 8e20e9586e9f083ef7a0582eccfea38b6512709a Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Fri, 9 Jun 2023 11:42:50 +0200 Subject: [PATCH 5/6] Fix .NET Framework build. --- .../RemoveDuplicateFilesWithHardLinks.cs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs b/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs index 4377ce8e7..36bec96fa 100644 --- a/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs +++ b/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs @@ -2,6 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +#if !NETFRAMEWORK +#nullable enable + using System; using System.Collections.Generic; using System.ComponentModel; @@ -10,6 +13,7 @@ using System.IO.Enumeration; using System.IO.MemoryMappedFiles; using System.Linq; using System.Runtime.InteropServices; +#endif using Microsoft.Build.Framework; using Microsoft.Build.Utilities; @@ -26,6 +30,13 @@ namespace Microsoft.DotNet.Build.Tasks [Required] public string Directory { get; set; } = ""; +#if NETFRAMEWORK + public override bool Execute() + { + Log.LogError($"{nameof(RemoveDuplicateFilesWithHardLinks)} is not supported on .NET Framework."); + return false; + } +#else public override bool Execute() { if (OperatingSystem.IsWindows()) @@ -55,7 +66,7 @@ namespace Microsoft.DotNet.Build.Tasks }; // Group them by file size. - IEnumerable filesGroupedBySize = fse.GroupBy(file => file.Length, + IEnumerable filesGroupedBySize = fse.GroupBy(file => file.Length, file => file.FullName, (size, files) => files.ToArray()); @@ -64,14 +75,14 @@ namespace Microsoft.DotNet.Build.Tasks { for (int i = 0; i < files.Length; i++) { - string path1 = files[i]; + string? path1 = files[i]; if (path1 is null) { continue; // already linked. } for (int j = i + 1; j < files.Length; j++) { - string path2 = files[j]; + string? path2 = files[j]; if (path2 is null) { continue; // already linked. @@ -155,5 +166,6 @@ namespace Microsoft.DotNet.Build.Tasks // This native method is used by the runtime to create hard links. It is not exposed through a public .NET API. [DllImport("libSystem.Native", SetLastError = true)] static extern int SystemNative_Link(string source, string link); +#endif } } From 69f653a2269506ccba8e6efd1978f088b1c33840 Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Fri, 9 Jun 2023 16:21:45 +0200 Subject: [PATCH 6/6] Rename Task. --- ...thHardLinks.cs => ReplaceDuplicateFilesWithHardLinks.cs} | 6 +++--- src/redist/targets/BuildCoreSdkTasks.targets | 2 +- src/redist/targets/GenerateLayout.targets | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename src/core-sdk-tasks/{RemoveDuplicateFilesWithHardLinks.cs => ReplaceDuplicateFilesWithHardLinks.cs} (95%) diff --git a/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs b/src/core-sdk-tasks/ReplaceDuplicateFilesWithHardLinks.cs similarity index 95% rename from src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs rename to src/core-sdk-tasks/ReplaceDuplicateFilesWithHardLinks.cs index 36bec96fa..7fe6559fc 100644 --- a/src/core-sdk-tasks/RemoveDuplicateFilesWithHardLinks.cs +++ b/src/core-sdk-tasks/ReplaceDuplicateFilesWithHardLinks.cs @@ -22,7 +22,7 @@ namespace Microsoft.DotNet.Build.Tasks /// /// Replaces files that have the same content with hard links. /// - public sealed class RemoveDuplicateFilesWithHardLinks : Task + public sealed class ReplaceDuplicateFilesWithHardLinks : Task { /// /// The path to the directory. @@ -33,7 +33,7 @@ namespace Microsoft.DotNet.Build.Tasks #if NETFRAMEWORK public override bool Execute() { - Log.LogError($"{nameof(RemoveDuplicateFilesWithHardLinks)} is not supported on .NET Framework."); + Log.LogError($"{nameof(ReplaceDuplicateFilesWithHardLinks)} is not supported on .NET Framework."); return false; } #else @@ -41,7 +41,7 @@ namespace Microsoft.DotNet.Build.Tasks { if (OperatingSystem.IsWindows()) { - Log.LogError($"{nameof(RemoveDuplicateFilesWithHardLinks)} is not supported on Windows."); + Log.LogError($"{nameof(ReplaceDuplicateFilesWithHardLinks)} is not supported on Windows."); return false; } diff --git a/src/redist/targets/BuildCoreSdkTasks.targets b/src/redist/targets/BuildCoreSdkTasks.targets index 7cf28ecdb..afd5d21bd 100644 --- a/src/redist/targets/BuildCoreSdkTasks.targets +++ b/src/redist/targets/BuildCoreSdkTasks.targets @@ -40,6 +40,6 @@ - + diff --git a/src/redist/targets/GenerateLayout.targets b/src/redist/targets/GenerateLayout.targets index e6923ed03..f12dd495f 100644 --- a/src/redist/targets/GenerateLayout.targets +++ b/src/redist/targets/GenerateLayout.targets @@ -568,7 +568,7 @@ and the corresponding shared frameworks are included in a distro package their data is shared instead of duplicated. --> - +