
Previously we'd keep track of any file that we extracted once and try to reuse that file (by copying it) if we needed the same file later at a different destination. The reason was that it's theoretically faster to a file copy than a createfile and write, since the copy can happen entirely in the kernel. In practice we were foiled by AV scanners. This happens to be the only time during extraction where we let a file close after writing it and then try and use it again. Sure enough on fast machines we were seeing that as soon as we closed it MsMpEng would map the file for exclusive access causing our copy to fail with a sharing violation. To fix this, I've removed the copy optimization and will just copy the file from the in-memory archive every time.
540 lines
20 KiB
C#
540 lines
20 KiB
C#
// Copyright (c) .NET Foundation and contributors. All rights reserved.
|
|
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.IO.Compression;
|
|
using System.Linq;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
using System.Threading;
|
|
|
|
namespace Microsoft.DotNet.Archive
|
|
{
|
|
public class IndexedArchive : IDisposable
|
|
{
|
|
private class DestinationFileInfo
|
|
{
|
|
public DestinationFileInfo(string destinationPath, string hash)
|
|
{
|
|
DestinationPath = destinationPath;
|
|
Hash = hash;
|
|
}
|
|
|
|
public string DestinationPath { get; }
|
|
public string Hash { get; }
|
|
}
|
|
|
|
private class ArchiveFileInfo
|
|
{
|
|
public ArchiveFileInfo(Stream stream, string archivePath, string hash)
|
|
{
|
|
Stream = stream;
|
|
ArchivePath = archivePath;
|
|
Hash = hash;
|
|
}
|
|
|
|
public Stream Stream { get; set; }
|
|
public string ArchivePath { get; }
|
|
public string Hash { get; }
|
|
public string FileName { get { return Path.GetFileNameWithoutExtension(ArchivePath); } }
|
|
public string Extension { get { return Path.GetExtension(ArchivePath); } }
|
|
|
|
public long Size { get { return Stream.Length; } }
|
|
|
|
}
|
|
|
|
static string[] ZipExtensions = new[] { ".zip", ".nupkg" };
|
|
static string IndexFileName = "index.txt";
|
|
|
|
// maps file hash to archve path
|
|
// $ prefix indicates that the file is not in the archive and path is a hash
|
|
private Dictionary<string, ArchiveFileInfo> _archiveFiles = new Dictionary<string, ArchiveFileInfo>();
|
|
// maps file hash to external path
|
|
private Dictionary<string, string> _externalFiles = new Dictionary<string, string>();
|
|
// lists all extracted files & hashes
|
|
private List<DestinationFileInfo> _destFiles = new List<DestinationFileInfo>();
|
|
private bool _disposed = false;
|
|
private ThreadLocal<SHA256> _sha = new ThreadLocal<SHA256>(() => SHA256.Create());
|
|
|
|
public IndexedArchive()
|
|
{ }
|
|
|
|
private static Stream CreateTemporaryStream()
|
|
{
|
|
string temp = Path.GetTempPath();
|
|
string tempFile = Path.Combine(temp, Guid.NewGuid().ToString());
|
|
return File.Create(tempFile, 4096, FileOptions.DeleteOnClose);
|
|
}
|
|
|
|
private static FileStream CreateTemporaryFileStream()
|
|
{
|
|
string temp = Path.GetTempPath();
|
|
string tempFile = Path.Combine(temp, Guid.NewGuid().ToString());
|
|
return new FileStream(tempFile, FileMode.Create, FileAccess.ReadWrite, FileShare.Read | FileShare.Delete, 4096, FileOptions.DeleteOnClose);
|
|
}
|
|
|
|
public void Save(string archivePath, IProgress<ProgressReport> progress)
|
|
{
|
|
CheckDisposed();
|
|
|
|
using (var archiveStream = CreateTemporaryStream())
|
|
{
|
|
using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Create, true))
|
|
{
|
|
BuildArchive(archive, progress);
|
|
} // close archive
|
|
|
|
archiveStream.Seek(0, SeekOrigin.Begin);
|
|
|
|
using (var lzmaStream = File.Create(archivePath))
|
|
{
|
|
CompressionUtility.Compress(archiveStream, lzmaStream, progress);
|
|
}
|
|
} // close archiveStream
|
|
}
|
|
|
|
private void BuildArchive(ZipArchive archive, IProgress<ProgressReport> progress)
|
|
{
|
|
// write the file index
|
|
var indexEntry = archive.CreateEntry(IndexFileName, CompressionLevel.NoCompression);
|
|
|
|
using (var stream = indexEntry.Open())
|
|
using (var textWriter = new StreamWriter(stream))
|
|
{
|
|
foreach (var entry in _destFiles)
|
|
{
|
|
var archiveFile = _archiveFiles[entry.Hash];
|
|
string archivePath = _archiveFiles[entry.Hash].ArchivePath;
|
|
if (archiveFile.Stream == null)
|
|
{
|
|
archivePath = "$" + archivePath;
|
|
}
|
|
|
|
textWriter.WriteLine($"{entry.DestinationPath}|{archivePath}");
|
|
}
|
|
}
|
|
|
|
// sort the files so that similar files are close together
|
|
var filesToArchive = _archiveFiles.Values.ToList();
|
|
filesToArchive.Sort((f1, f2) =>
|
|
{
|
|
// first sort by extension
|
|
var comp = String.Compare(f1.Extension, f2.Extension, StringComparison.OrdinalIgnoreCase);
|
|
|
|
if (comp == 0)
|
|
{
|
|
// then sort by filename
|
|
comp = String.Compare(f1.FileName, f2.FileName, StringComparison.OrdinalIgnoreCase);
|
|
}
|
|
|
|
if (comp == 0)
|
|
{
|
|
// sort by file size (helps differentiate ref/lib/facade)
|
|
comp = f1.Size.CompareTo(f2.Size);
|
|
}
|
|
|
|
if (comp == 0)
|
|
{
|
|
// finally sort by full archive path so we have stable output
|
|
comp = String.Compare(f1.ArchivePath, f2.ArchivePath, StringComparison.OrdinalIgnoreCase);
|
|
}
|
|
|
|
return comp;
|
|
});
|
|
|
|
int filesAdded = 0;
|
|
// add all the files
|
|
foreach (var fileToArchive in filesToArchive)
|
|
{
|
|
var entry = archive.CreateEntry(fileToArchive.ArchivePath, CompressionLevel.NoCompression);
|
|
using (var entryStream = entry.Open())
|
|
{
|
|
fileToArchive.Stream.CopyTo(entryStream);
|
|
fileToArchive.Stream.Dispose();
|
|
fileToArchive.Stream = null;
|
|
}
|
|
|
|
progress.Report("Archiving files", ++filesAdded, filesToArchive.Count);
|
|
}
|
|
}
|
|
|
|
private abstract class ExtractOperation
|
|
{
|
|
public ExtractOperation(string destinationPath)
|
|
{
|
|
DestinationPath = destinationPath;
|
|
}
|
|
|
|
public string DestinationPath { get; }
|
|
public virtual void DoOperation()
|
|
{
|
|
string directory = Path.GetDirectoryName(DestinationPath);
|
|
|
|
if (!Directory.Exists(directory))
|
|
{
|
|
Directory.CreateDirectory(directory);
|
|
}
|
|
|
|
Execute();
|
|
}
|
|
protected abstract void Execute();
|
|
}
|
|
|
|
private class CopyOperation : ExtractOperation
|
|
{
|
|
public CopyOperation(ExtractSource source, string destinationPath) : base(destinationPath)
|
|
{
|
|
Source = source;
|
|
}
|
|
public ExtractSource Source { get; }
|
|
protected override void Execute()
|
|
{
|
|
if (Source.LocalPath != null)
|
|
{
|
|
File.Copy(Source.LocalPath, DestinationPath, true);
|
|
}
|
|
else
|
|
{
|
|
using (var destinationStream = File.Create(DestinationPath))
|
|
{
|
|
Source.CopyToStream(destinationStream);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private class ZipOperation : ExtractOperation
|
|
{
|
|
public ZipOperation(string destinationPath) : base(destinationPath)
|
|
{
|
|
}
|
|
|
|
private List<Tuple<string, ExtractSource>> entries = new List<Tuple<string, ExtractSource>>();
|
|
|
|
public void AddEntry(string entryName, ExtractSource source)
|
|
{
|
|
entries.Add(Tuple.Create(entryName, source));
|
|
}
|
|
|
|
protected override void Execute()
|
|
{
|
|
using (var archiveStream = File.Create(DestinationPath))
|
|
using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Create))
|
|
{
|
|
foreach(var zipSource in entries)
|
|
{
|
|
var entry = archive.CreateEntry(zipSource.Item1, CompressionLevel.Optimal);
|
|
using (var entryStream = entry.Open())
|
|
{
|
|
zipSource.Item2.CopyToStream(entryStream);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private class ExtractSource
|
|
{
|
|
private string _entryName;
|
|
private readonly string _localPath;
|
|
private ThreadLocalZipArchive _archive;
|
|
|
|
public ExtractSource(string sourceString, Dictionary<string, string> externalFiles, ThreadLocalZipArchive archive)
|
|
{
|
|
if (sourceString[0] == '$')
|
|
{
|
|
var externalHash = sourceString.Substring(1);
|
|
if (!externalFiles.TryGetValue(externalHash, out _localPath))
|
|
{
|
|
throw new Exception("Could not find external file with hash {externalHash}.");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_entryName = sourceString;
|
|
_archive = archive;
|
|
}
|
|
}
|
|
|
|
public string LocalPath { get { return _localPath; } }
|
|
|
|
public void CopyToStream(Stream destinationStream)
|
|
{
|
|
if (_localPath != null)
|
|
{
|
|
using (var sourceStream = File.OpenRead(_localPath))
|
|
{
|
|
sourceStream.CopyTo(destinationStream);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
using (var sourceStream = _archive.Archive.GetEntry(_entryName).Open())
|
|
{
|
|
sourceStream.CopyTo(destinationStream);
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
private static char[] pipeSeperator = new[] { '|' };
|
|
public void Extract(string compressedArchivePath, string outputDirectory, IProgress<ProgressReport> progress)
|
|
{
|
|
using (var archiveStream = CreateTemporaryFileStream())
|
|
{
|
|
// decompress the LZMA stream
|
|
using (var lzmaStream = File.OpenRead(compressedArchivePath))
|
|
{
|
|
CompressionUtility.Decompress(lzmaStream, archiveStream, progress);
|
|
}
|
|
|
|
var archivePath = ((FileStream)archiveStream).Name;
|
|
|
|
// reset the uncompressed stream
|
|
archiveStream.Seek(0, SeekOrigin.Begin);
|
|
|
|
// read as a zip archive
|
|
using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read))
|
|
using (var tlArchive = new ThreadLocalZipArchive(archivePath, archive))
|
|
{
|
|
List<ExtractOperation> extractOperations = new List<ExtractOperation>();
|
|
Dictionary<string, ExtractSource> sourceCache = new Dictionary<string, ExtractSource>();
|
|
|
|
// process the index to determine all extraction operations
|
|
var indexEntry = archive.GetEntry(IndexFileName);
|
|
using (var indexReader = new StreamReader(indexEntry.Open()))
|
|
{
|
|
Dictionary<string, ZipOperation> zipOperations = new Dictionary<string, ZipOperation>(StringComparer.OrdinalIgnoreCase);
|
|
for (var line = indexReader.ReadLine(); line != null; line = indexReader.ReadLine())
|
|
{
|
|
var lineParts = line.Split(pipeSeperator);
|
|
if (lineParts.Length != 2)
|
|
{
|
|
throw new Exception("Unexpected index line format, too many '|'s.");
|
|
}
|
|
|
|
string target = lineParts[0];
|
|
string source = lineParts[1];
|
|
|
|
ExtractSource extractSource;
|
|
if (!sourceCache.TryGetValue(source, out extractSource))
|
|
{
|
|
sourceCache[source] = extractSource = new ExtractSource(source, _externalFiles, tlArchive);
|
|
}
|
|
|
|
var zipSeperatorIndex = target.IndexOf("::", StringComparison.OrdinalIgnoreCase);
|
|
|
|
if (zipSeperatorIndex != -1)
|
|
{
|
|
string zipRelativePath = target.Substring(0, zipSeperatorIndex);
|
|
string zipEntryName = target.Substring(zipSeperatorIndex + 2);
|
|
string destinationPath = Path.Combine(outputDirectory, zipRelativePath);
|
|
|
|
// operations on a zip file will be sequential
|
|
ZipOperation currentZipOperation;
|
|
|
|
if (!zipOperations.TryGetValue(destinationPath, out currentZipOperation))
|
|
{
|
|
extractOperations.Add(currentZipOperation = new ZipOperation(destinationPath));
|
|
zipOperations.Add(destinationPath, currentZipOperation);
|
|
}
|
|
currentZipOperation.AddEntry(zipEntryName, extractSource);
|
|
}
|
|
else
|
|
{
|
|
string destinationPath = Path.Combine(outputDirectory, target);
|
|
extractOperations.Add(new CopyOperation(extractSource, destinationPath));
|
|
}
|
|
}
|
|
}
|
|
|
|
int opsExecuted = 0;
|
|
// execute all operations
|
|
//foreach(var extractOperation in extractOperations)
|
|
extractOperations.AsParallel().ForAll(extractOperation =>
|
|
{
|
|
extractOperation.DoOperation();
|
|
progress.Report("Expanding", Interlocked.Increment(ref opsExecuted), extractOperations.Count);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
public void AddExternalDirectory(string externalDirectory)
|
|
{
|
|
CheckDisposed();
|
|
foreach (var externalFile in Directory.EnumerateFiles(externalDirectory, "*", SearchOption.AllDirectories))
|
|
{
|
|
AddExternalFile(externalFile);
|
|
}
|
|
}
|
|
|
|
public void AddExternalFile(string externalFile)
|
|
{
|
|
CheckDisposed();
|
|
using (var fs = File.OpenRead(externalFile))
|
|
{
|
|
string hash = GetHash(fs);
|
|
// $ prefix indicates that the file is not in the archive and path is relative to an external directory
|
|
_archiveFiles[hash] = new ArchiveFileInfo(null, "$" + hash , hash);
|
|
_externalFiles[hash] = externalFile;
|
|
}
|
|
}
|
|
public void AddDirectory(string sourceDirectory, IProgress<ProgressReport> progress, string destinationDirectory = null)
|
|
{
|
|
var sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*", SearchOption.AllDirectories).ToArray();
|
|
int filesAdded = 0;
|
|
sourceFiles.AsParallel().ForAll(sourceFile =>
|
|
{
|
|
// path relative to the destination/extracted directory to write the file
|
|
string destinationRelativePath = sourceFile.Substring(sourceDirectory.Length + 1);
|
|
|
|
if (destinationDirectory != null)
|
|
{
|
|
destinationRelativePath = Path.Combine(destinationDirectory, destinationRelativePath);
|
|
}
|
|
|
|
string extension = Path.GetExtension(sourceFile);
|
|
|
|
if (ZipExtensions.Any(ze => ze.Equals(extension, StringComparison.OrdinalIgnoreCase)))
|
|
{
|
|
AddZip(sourceFile, destinationRelativePath);
|
|
}
|
|
else
|
|
{
|
|
AddFile(sourceFile, destinationRelativePath);
|
|
}
|
|
|
|
progress.Report($"Adding {sourceDirectory}", Interlocked.Increment(ref filesAdded), sourceFiles.Length);
|
|
});
|
|
}
|
|
|
|
public void AddZip(string sourceZipFile, string destinationZipFile)
|
|
{
|
|
using (var sourceArchive = new ZipArchive(File.OpenRead(sourceZipFile), ZipArchiveMode.Read))
|
|
{
|
|
foreach(var entry in sourceArchive.Entries)
|
|
{
|
|
// we can dispose this stream, if AddStream uses it, it will make a copy.
|
|
using (var stream = entry.Open())
|
|
{
|
|
string destinationPath = $"{destinationZipFile}::{entry.FullName}";
|
|
AddStream(stream, destinationPath);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public void AddFile(string sourceFilePath, string destinationPath)
|
|
{
|
|
// lifetime of this stream is managed by AddStream
|
|
var stream = File.Open(sourceFilePath, FileMode.Open);
|
|
AddStream(stream, destinationPath);
|
|
}
|
|
|
|
public void AddStream(Stream stream, string destinationPath)
|
|
{
|
|
CheckDisposed();
|
|
|
|
string hash = null;
|
|
|
|
if (stream.CanSeek)
|
|
{
|
|
hash = GetHash(stream);
|
|
}
|
|
else
|
|
{
|
|
var copy = CreateTemporaryStream();
|
|
stream.CopyTo(copy);
|
|
copy.Seek(0, SeekOrigin.Begin);
|
|
hash = GetHash(copy);
|
|
stream.Dispose();
|
|
stream = copy;
|
|
}
|
|
|
|
lock (_archiveFiles)
|
|
{
|
|
_destFiles.Add(new DestinationFileInfo(destinationPath, hash));
|
|
|
|
// see if we already have this file in the archive/external
|
|
ArchiveFileInfo existing = null;
|
|
if (_archiveFiles.TryGetValue(hash, out existing))
|
|
{
|
|
// reduce memory pressure
|
|
if (!(stream is MemoryStream) && (existing.Stream is MemoryStream))
|
|
{
|
|
// dispose memory stream
|
|
existing.Stream.Dispose();
|
|
stream.Seek(0, SeekOrigin.Begin);
|
|
existing.Stream = stream;
|
|
}
|
|
else
|
|
{
|
|
// we already have a good stream, free this one.
|
|
stream.Dispose();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// add a new entry;
|
|
stream.Seek(0, SeekOrigin.Begin);
|
|
var archivePath = Path.Combine(hash, Path.GetFileName(destinationPath));
|
|
|
|
_archiveFiles.Add(hash, new ArchiveFileInfo(stream, archivePath, hash));
|
|
}
|
|
}
|
|
}
|
|
|
|
public string GetHash(Stream stream)
|
|
{
|
|
var hashBytes = _sha.Value.ComputeHash(stream);
|
|
|
|
return GetHashString(hashBytes);
|
|
}
|
|
|
|
private static string GetHashString(byte[] hashBytes)
|
|
{
|
|
StringBuilder builder = new StringBuilder(hashBytes.Length * 2);
|
|
foreach (var b in hashBytes)
|
|
{
|
|
builder.AppendFormat("{0:x2}", b);
|
|
}
|
|
return builder.ToString();
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
if (!_disposed)
|
|
{
|
|
if (_archiveFiles != null)
|
|
{
|
|
foreach(var archiveFile in _archiveFiles.Values)
|
|
{
|
|
if (archiveFile.Stream != null)
|
|
{
|
|
archiveFile.Stream.Dispose();
|
|
archiveFile.Stream = null;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (_sha != null)
|
|
{
|
|
_sha.Dispose();
|
|
_sha = null;
|
|
}
|
|
}
|
|
}
|
|
|
|
private void CheckDisposed()
|
|
{
|
|
if (_disposed)
|
|
{
|
|
throw new ObjectDisposedException(nameof(IndexedArchive));
|
|
}
|
|
}
|
|
}
|
|
}
|