Replace rdfind invocation with Task that replaces duplicate files with links.
This commit is contained in:
parent
f98e016a22
commit
308b0076e8
5 changed files with 173 additions and 12 deletions
|
@ -16,11 +16,15 @@ namespace Microsoft.DotNet.SourceBuild.SmokeTests;
|
|||
|
||||
public static class Utilities
|
||||
{
|
||||
public static void ExtractTarball(string tarballPath, string outputDir)
|
||||
public static void ExtractTarball(string tarballPath, string outputDir, ITestOutputHelper outputHelper)
|
||||
{
|
||||
using FileStream fileStream = File.OpenRead(tarballPath);
|
||||
using GZipStream decompressorStream = new(fileStream, CompressionMode.Decompress);
|
||||
TarFile.ExtractToDirectory(decompressorStream, outputDir, true);
|
||||
// TarFile doesn't properly handle hard links (https://github.com/dotnet/runtime/pull/85378#discussion_r1221817490),
|
||||
// use 'tar' instead.
|
||||
ExecuteHelper.ExecuteProcessValidateExitCode("tar", $"xzf {tarballPath} -C {outputDir}", outputHelper);
|
||||
|
||||
// using FileStream fileStream = File.OpenRead(tarballPath);
|
||||
// using GZipStream decompressorStream = new(fileStream, CompressionMode.Decompress);
|
||||
// TarFile.ExtractToDirectory(decompressorStream, outputDir, true);
|
||||
}
|
||||
|
||||
public static void ExtractTarball(string tarballPath, string outputDir, string targetFilePath)
|
||||
|
|
161
src/core-sdk-tasks/RemoveDuplicateFiles.cs
Normal file
161
src/core-sdk-tasks/RemoveDuplicateFiles.cs
Normal file
|
@ -0,0 +1,161 @@
|
|||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.ComponentModel;
|
||||
using System.IO;
|
||||
using System.IO.Enumeration;
|
||||
using System.IO.MemoryMappedFiles;
|
||||
using System.Linq;
|
||||
using System.Runtime.InteropServices;
|
||||
using Microsoft.Build.Framework;
|
||||
using Microsoft.Build.Utilities;
|
||||
|
||||
namespace Microsoft.DotNet.Build.Tasks
|
||||
{
|
||||
/// <summary>
|
||||
/// Replaces files that have the same content with hard links.
|
||||
/// </summary>
|
||||
public sealed class RemoveDuplicateFiles : Task
|
||||
{
|
||||
/// <summary>
|
||||
/// The path to the directory.
|
||||
/// </summary>
|
||||
[Required]
|
||||
public string Directory { get; set; } = "";
|
||||
|
||||
public override bool Execute()
|
||||
{
|
||||
if (OperatingSystem.IsWindows())
|
||||
{
|
||||
Log.LogError($"{nameof(RemoveDuplicateFiles)} is not supported on Windows.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!System.IO.Directory.Exists(Directory))
|
||||
{
|
||||
Log.LogError($"'{Directory}' does not exist.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find all non-empty, non-symbolic link files.
|
||||
IEnumerable<FileInfo> fse = new FileSystemEnumerable<FileInfo>(
|
||||
Directory,
|
||||
(ref FileSystemEntry entry) => (FileInfo)entry.ToFileSystemInfo(),
|
||||
new EnumerationOptions()
|
||||
{
|
||||
AttributesToSkip = FileAttributes.ReparsePoint,
|
||||
RecurseSubdirectories = true
|
||||
})
|
||||
{
|
||||
ShouldIncludePredicate = (ref FileSystemEntry entry) => !entry.IsDirectory
|
||||
&& entry.Length > 0
|
||||
};
|
||||
|
||||
// Group them by file size.
|
||||
IEnumerable<string?[]> filesGroupedBySize = fse.GroupBy(file => file.Length,
|
||||
file => file.FullName,
|
||||
(size, files) => files.ToArray());
|
||||
|
||||
// Replace files with same content with hard link.
|
||||
foreach (var files in filesGroupedBySize)
|
||||
{
|
||||
for (int i = 0; i < files.Length; i++)
|
||||
{
|
||||
string? path1 = files[i];
|
||||
if (path1 is null)
|
||||
{
|
||||
continue; // already linked.
|
||||
}
|
||||
for (int j = i + 1; j < files.Length; j++)
|
||||
{
|
||||
string? path2 = files[j];
|
||||
if (path2 is null)
|
||||
{
|
||||
continue; // already linked.
|
||||
}
|
||||
|
||||
// note: There's no public API we can use to see if paths are already linked.
|
||||
// We treat those paths as unlinked files, and link them again.
|
||||
if (FilesHaveSameContent(path1, path2))
|
||||
{
|
||||
ReplaceByLink(path1, path2);
|
||||
|
||||
files[j] = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private unsafe bool FilesHaveSameContent(string path1, string path2)
|
||||
{
|
||||
using var mappedFile1 = MemoryMappedFile.CreateFromFile(path1, FileMode.Open);
|
||||
using var accessor1 = mappedFile1.CreateViewAccessor();
|
||||
byte* ptr1 = null;
|
||||
|
||||
using var mappedFile2 = MemoryMappedFile.CreateFromFile(path2, FileMode.Open);
|
||||
using var accessor2 = mappedFile2.CreateViewAccessor();
|
||||
byte* ptr2 = null;
|
||||
|
||||
try
|
||||
{
|
||||
accessor1.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr1);
|
||||
Span<byte> span1 = new Span<byte>(ptr1, checked((int)accessor1.SafeMemoryMappedViewHandle.ByteLength));
|
||||
|
||||
accessor2.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr2);
|
||||
Span<byte> span2 = new Span<byte>(ptr2, checked((int)accessor2.SafeMemoryMappedViewHandle.ByteLength));
|
||||
|
||||
return span1.SequenceEqual(span2);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (ptr1 != null)
|
||||
{
|
||||
accessor1.SafeMemoryMappedViewHandle.ReleasePointer();
|
||||
ptr1 = null;
|
||||
}
|
||||
if (ptr2 != null)
|
||||
{
|
||||
accessor2.SafeMemoryMappedViewHandle.ReleasePointer();
|
||||
ptr2 = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReplaceByLink(string path1, string path2)
|
||||
{
|
||||
// To link, the target mustn't exist. Make a backup, so we can restore it when linking fails.
|
||||
string path2Backup = $"{path2}.pre_link_backup";
|
||||
File.Move(path2, path2Backup);
|
||||
|
||||
int rv = SystemNative_Link(path1, path2);
|
||||
if (rv != 0)
|
||||
{
|
||||
var ex = new Win32Exception(); // Captures the LastError.
|
||||
|
||||
Log.LogError($"Unable to link '{path2}' to '{path1}.': {ex}");
|
||||
|
||||
File.Move(path2Backup, path2);
|
||||
|
||||
throw ex;
|
||||
}
|
||||
else
|
||||
{
|
||||
File.Delete(path2Backup);
|
||||
|
||||
Log.LogMessage(MessageImportance.Normal, $"Linked '{path1}' and '{path2}'.");
|
||||
}
|
||||
}
|
||||
|
||||
// This native method is used by the runtime to create hard links. It is not exposed through a public .NET API.
|
||||
[DllImport("libSystem.Native", SetLastError = true)]
|
||||
static extern int SystemNative_Link(string source, string link);
|
||||
}
|
||||
}
|
|
@ -6,6 +6,7 @@
|
|||
<RootNamespace>Microsoft.DotNet.Cli.Build</RootNamespace>
|
||||
<DefineConstants Condition="'$(DotNetBuildFromSource)' == 'true'">$(DefineConstants);SOURCE_BUILD</DefineConstants>
|
||||
<DisableImplicitNamespaceImports>true</DisableImplicitNamespaceImports>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
|
@ -40,5 +40,6 @@
|
|||
<UsingTask TaskName="CollatePackageDownloads" AssemblyFile="$(CoreSdkTaskDll)"/>
|
||||
<UsingTask TaskName="GenerateSdkRuntimeIdentifierChain" AssemblyFile="$(CoreSdkTaskDll)"/>
|
||||
<UsingTask TaskName="GetDependencyInfo" AssemblyFile="$(CoreSdkTaskDll)"/>
|
||||
<UsingTask TaskName="RemoveDuplicateFiles" AssemblyFile="$(CoreSdkTaskDll)"/>
|
||||
|
||||
</Project>
|
||||
|
|
|
@ -567,14 +567,8 @@
|
|||
<!-- Replace duplicate files with hard links so that when the same files from a runtime pack
|
||||
and the corresponding shared frameworks are included in a distro package their data is shared instead of duplicated. -->
|
||||
<Target Name="ReplaceDuplicateFilesWithHardLinks" DependsOnTargets="LayoutBundledComponents"
|
||||
Condition="'$(BundleRuntimePacks)' == 'true'">
|
||||
<Exec Command="rdfind --help" StandardOutputImportance="low" StandardErrorImportance="low" IgnoreExitCode="True">
|
||||
<Output TaskParameter="ExitCode" PropertyName="RdFindInfoExitCode" />
|
||||
</Exec>
|
||||
|
||||
<Message Text="rdfind is not available to make hard links." Condition="'$(RdFindInfoExitCode)' != '0'" Importance="high" />
|
||||
|
||||
<Exec Command="rdfind -makehardlinks true -makeresultsfile false '$(RedistLayoutPath)'" Condition="'$(RdFindInfoExitCode)' == '0'" />
|
||||
Condition="'$(BundleRuntimePacks)' == 'true' and !$([MSBuild]::IsOSPlatform('WINDOWS'))">
|
||||
<RemoveDuplicateFiles Directory="$(RedistLayoutPath)" />
|
||||
</Target>
|
||||
|
||||
<Target Name="GenerateLayout"
|
||||
|
|
Loading…
Reference in a new issue