diff --git a/src/Microsoft.DotNet.Archive/CompressionUtility.cs b/src/Microsoft.DotNet.Archive/CompressionUtility.cs new file mode 100644 index 000000000..143ce38c3 --- /dev/null +++ b/src/Microsoft.DotNet.Archive/CompressionUtility.cs @@ -0,0 +1,108 @@ +using SevenZip; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Archive +{ + internal static class CompressionUtility + { + enum MeasureBy + { + Input, + Output + } + + private class LzmaProgress : ICodeProgress + { + private IProgress progress; + private long totalSize; + private string phase; + private MeasureBy measureBy; + + public LzmaProgress(IProgress progress, string phase, long totalSize, MeasureBy measureBy) + { + this.progress = progress; + this.totalSize = totalSize; + this.phase = phase; + this.measureBy = measureBy; + } + + public void SetProgress(long inSize, long outSize) + { + progress.Report(phase, measureBy == MeasureBy.Input ? inSize : outSize, totalSize); + } + } + + public static void Compress(Stream inStream, Stream outStream, IProgress progress) + { + SevenZip.Compression.LZMA.Encoder encoder = new SevenZip.Compression.LZMA.Encoder(); + + CoderPropID[] propIDs = + { + CoderPropID.DictionarySize, + CoderPropID.PosStateBits, + CoderPropID.LitContextBits, + CoderPropID.LitPosBits, + CoderPropID.Algorithm, + CoderPropID.NumFastBytes, + CoderPropID.MatchFinder, + CoderPropID.EndMarker + }; + object[] properties = + { + (Int32)(1 << 26), + (Int32)(1), + (Int32)(8), + (Int32)(0), + (Int32)(2), + (Int32)(96), + "bt4", + false + }; + + encoder.SetCoderProperties(propIDs, properties); + encoder.WriteCoderProperties(outStream); + + Int64 inSize = inStream.Length; + for (int i = 0; i < 8; i++) + { + outStream.WriteByte((Byte)(inSize >> (8 * i))); + } + + var lzmaProgress = new LzmaProgress(progress, "Compressing", inSize, MeasureBy.Input); + lzmaProgress.SetProgress(0, 0); + encoder.Code(inStream, outStream, -1, -1, lzmaProgress); + lzmaProgress.SetProgress(inSize, outStream.Length); + } + + public static void Decompress(Stream inStream, Stream outStream, IProgress progress) + { + byte[] properties = new byte[5]; + + if (inStream.Read(properties, 0, 5) != 5) + throw (new Exception("input .lzma is too short")); + + SevenZip.Compression.LZMA.Decoder decoder = new SevenZip.Compression.LZMA.Decoder(); + decoder.SetDecoderProperties(properties); + + long outSize = 0; + for (int i = 0; i < 8; i++) + { + int v = inStream.ReadByte(); + if (v < 0) + throw (new Exception("Can't Read 1")); + outSize |= ((long)(byte)v) << (8 * i); + } + + long compressedSize = inStream.Length - inStream.Position; + var lzmaProgress = new LzmaProgress(progress, "Decompressing", outSize, MeasureBy.Output); + lzmaProgress.SetProgress(0, 0); + decoder.Code(inStream, outStream, compressedSize, outSize, lzmaProgress); + lzmaProgress.SetProgress(inStream.Length, outSize); + } + } +} diff --git a/src/Microsoft.DotNet.Archive/IndexedArchive.cs b/src/Microsoft.DotNet.Archive/IndexedArchive.cs new file mode 100644 index 000000000..30a85e8eb --- /dev/null +++ b/src/Microsoft.DotNet.Archive/IndexedArchive.cs @@ -0,0 +1,584 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; +using System.Linq; +using System.Security.Cryptography; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Archive +{ + public class IndexedArchive : IDisposable + { + private class DestinationFileInfo + { + public DestinationFileInfo(string destinationPath, string hash) + { + DestinationPath = destinationPath; + Hash = hash; + } + + public string DestinationPath { get; } + public string Hash { get; } + } + + private class ArchiveFileInfo + { + public ArchiveFileInfo(Stream stream, string archivePath, string hash) + { + Stream = stream; + ArchivePath = archivePath; + Hash = hash; + } + + public Stream Stream { get; set; } + public string ArchivePath { get; } + public string Hash { get; } + public string FileName { get { return Path.GetFileNameWithoutExtension(ArchivePath); } } + public string Extension { get { return Path.GetExtension(ArchivePath); } } + + public long Size { get { return Stream.Length; } } + + } + + static string[] ZipExtensions = new[] { ".zip", ".nupkg" }; + static string IndexFileName = "index.txt"; + + // maps file hash to archve path + // $ prefix indicates that the file is not in the archive and path is a hash + Dictionary archiveFiles = new Dictionary(); + // maps file hash to external path + Dictionary externalFiles = new Dictionary(); + // lists all extracted files & hashes + List destFiles = new List(); + bool disposed = false; + ThreadLocal sha = new ThreadLocal(() => SHA256.Create()); + + public IndexedArchive() + { + + } + + + private static Stream CreateTemporaryStream() + { + // return new MemoryStream(); + + string temp = Path.GetTempPath(); + string tempFile = Path.Combine(temp, Guid.NewGuid().ToString()); + return File.Create(tempFile, 4096, FileOptions.DeleteOnClose); + } + + private static FileStream CreateTemporaryFileStream() + { + string temp = Path.GetTempPath(); + string tempFile = Path.Combine(temp, Guid.NewGuid().ToString()); + //return File.Create(tempFile, 4096, FileOptions.DeleteOnClose); + return new FileStream(tempFile, FileMode.Create, FileAccess.ReadWrite, FileShare.Read | FileShare.Delete, 4096, FileOptions.DeleteOnClose); + } + + public void Save(string archivePath, IProgress progress) + { + CheckDisposed(); + + //using (var archiveStream = CreateTemporaryStream()) + using (var archiveStream = File.Create(archivePath + ".zip")) + { + using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Create, true)) + { + BuildArchive(archive, progress); + } // close archive + + archiveStream.Seek(0, SeekOrigin.Begin); + + using (var lzmaStream = File.Create(archivePath)) + { + CompressionUtility.Compress(archiveStream, lzmaStream, progress); + } + } // close archiveStream + } + + private void BuildArchive(ZipArchive archive, IProgress progress) + { + // write the file index + var indexEntry = archive.CreateEntry(IndexFileName, CompressionLevel.NoCompression); + + using (var stream = indexEntry.Open()) + using (var textWriter = new StreamWriter(stream)) + { + foreach (var entry in destFiles) + { + var archiveFile = archiveFiles[entry.Hash]; + string archivePath = archiveFiles[entry.Hash].ArchivePath; + if (archiveFile.Stream == null) + { + archivePath = "$" + archivePath; + } + + textWriter.WriteLine($"{entry.DestinationPath}|{archivePath}"); + } + } + + // sort the files so that similar files are close together + var filesToArchive = archiveFiles.Values.ToList(); + filesToArchive.Sort((f1, f2) => + { + // first sort by extension + var comp = String.Compare(f1.Extension, f2.Extension, StringComparison.OrdinalIgnoreCase); + + if (comp == 0) + { + // then sort by filename + comp = String.Compare(f1.FileName, f2.FileName, StringComparison.OrdinalIgnoreCase); + } + + if (comp == 0) + { + // sort by file size (helps differentiate ref/lib/facade) + comp = f1.Size.CompareTo(f2.Size); + } + + if (comp == 0) + { + // finally sort by full archive path so we have stable output + comp = String.Compare(f1.ArchivePath, f2.ArchivePath, StringComparison.OrdinalIgnoreCase); + } + + return comp; + }); + + int filesAdded = 0; + // add all the files + foreach (var fileToArchive in filesToArchive) + { + var entry = archive.CreateEntry(fileToArchive.ArchivePath, CompressionLevel.NoCompression); + using (var entryStream = entry.Open()) + { + fileToArchive.Stream.CopyTo(entryStream); + fileToArchive.Stream.Dispose(); + fileToArchive.Stream = null; + } + + progress.Report("Archiving files", ++filesAdded, filesToArchive.Count); + } + } + + private abstract class ExtractOperation + { + public ExtractOperation(string destinationPath) + { + DestinationPath = destinationPath; + } + + public string DestinationPath { get; } + public virtual void DoOperation() + { + string directory = Path.GetDirectoryName(DestinationPath); + + if (!Directory.Exists(directory)) + { + Directory.CreateDirectory(directory); + } + + Execute(); + } + protected abstract void Execute(); + } + + private class CopyOperation : ExtractOperation + { + public CopyOperation(ExtractSource source, string destinationPath) : base(destinationPath) + { + Source = source; + } + public ExtractSource Source { get; } + protected override void Execute() + { + if (Source.LocalPath != null) + { + File.Copy(Source.LocalPath, DestinationPath, true); + } + else + { + using (var destinationStream = File.Create(DestinationPath)) + { + Source.CopyToStream(destinationStream); + } + } + } + } + + private class ZipOperation : ExtractOperation + { + public ZipOperation(string destinationPath) : base(destinationPath) + { + } + + private List> entries = new List>(); + + public void AddEntry(string entryName, ExtractSource source) + { + entries.Add(Tuple.Create(entryName, source)); + } + + protected override void Execute() + { + using (var archiveStream = File.Create(DestinationPath)) + using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Create)) + { + foreach(var zipSource in entries) + { + var entry = archive.CreateEntry(zipSource.Item1, CompressionLevel.Optimal); + using (var entryStream = entry.Open()) + { + zipSource.Item2.CopyToStream(entryStream); + } + } + } + } + } + + private class ExtractSource + { + private string _entryName; + private string _localPath; + private ThreadLocalZipArchive _archive; + + public ExtractSource(string sourceString, Dictionary externalFiles, ThreadLocalZipArchive archive) + { + if (sourceString[0] == '$') + { + var externalHash = sourceString.Substring(1); + if (!externalFiles.TryGetValue(externalHash, out _localPath)) + { + throw new Exception("Could not find external file with hash {externalHash}."); + } + } + else + { + _entryName = sourceString; + _archive = archive; + } + } + + public string LocalPath { get { return _localPath; } } + + public void CopyToStream(Stream destinationStream) + { + if (_localPath != null) + { + using (var sourceStream = File.OpenRead(_localPath)) + { + sourceStream.CopyTo(destinationStream); + } + } + else + { + // we open the archive each time since ZipArchive is not thread safe and we want to be able + // to extract from many threads + //using (var archive = new ZipArchive(File.Open(_archivePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete))) + using (var sourceStream = _archive.Archive.GetEntry(_entryName).Open()) + { + sourceStream.CopyTo(destinationStream); + + var destinationFileStream = destinationStream as FileStream; + if (destinationFileStream != null) + { + // Set Local path so that the next copy operation using the same source will + // do a copy instead of a write. + _localPath = destinationFileStream.Name; + } + } + } + + } + } + + private static char[] pipeSeperator = new[] { '|' }; + public void Extract(string compressedArchivePath, string outputDirectory, IProgress progress) + { + using (var archiveStream = CreateTemporaryFileStream()) + { + // decompress the LZMA stream + using (var lzmaStream = File.OpenRead(compressedArchivePath)) + { + CompressionUtility.Decompress(lzmaStream, archiveStream, progress); + } + + var archivePath = ((FileStream)archiveStream).Name; + + // reset the uncompressed stream + archiveStream.Seek(0, SeekOrigin.Begin); + + // read as a zip archive + using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read)) + using (var tlArchive = new ThreadLocalZipArchive(archivePath, archive)) + { + List extractOperations = new List(); + Dictionary sourceCache = new Dictionary(); + + // process the index to determine all extraction operations + var indexEntry = archive.GetEntry(IndexFileName); + using (var indexReader = new StreamReader(indexEntry.Open())) + { + Dictionary zipOperations = new Dictionary(StringComparer.OrdinalIgnoreCase); + for (var line = indexReader.ReadLine(); line != null; line = indexReader.ReadLine()) + { + var lineParts = line.Split(pipeSeperator); + if (lineParts.Length != 2) + { + throw new Exception("Unexpected index line format, too many '|'s."); + } + + string target = lineParts[0]; + string source = lineParts[1]; + + ExtractSource extractSource; + if (!sourceCache.TryGetValue(source, out extractSource)) + { + sourceCache[source] = extractSource = new ExtractSource(source, externalFiles, tlArchive); + } + + var zipSeperatorIndex = target.IndexOf("::", StringComparison.OrdinalIgnoreCase); + + if (zipSeperatorIndex != -1) + { + string zipRelativePath = target.Substring(0, zipSeperatorIndex); + string zipEntryName = target.Substring(zipSeperatorIndex + 2); + string destinationPath = Path.Combine(outputDirectory, zipRelativePath); + + // operations on a zip file will be sequential + ZipOperation currentZipOperation; + + if (!zipOperations.TryGetValue(destinationPath, out currentZipOperation)) + { + extractOperations.Add(currentZipOperation = new ZipOperation(destinationPath)); + zipOperations.Add(destinationPath, currentZipOperation); + } + currentZipOperation.AddEntry(zipEntryName, extractSource); + } + else + { + string destinationPath = Path.Combine(outputDirectory, target); + extractOperations.Add(new CopyOperation(extractSource, destinationPath)); + } + } + } + + int opsExecuted = 0; + // execute all operations + //foreach(var extractOperation in extractOperations) + extractOperations.AsParallel().ForAll(extractOperation => + { + extractOperation.DoOperation(); + progress.Report("Expanding", Interlocked.Increment(ref opsExecuted), extractOperations.Count); + }); + } + } + } + + public void AddExternalDirectory(string externalDirectory) + { + CheckDisposed(); + foreach (var externalFile in Directory.EnumerateFiles(externalDirectory, "*", SearchOption.AllDirectories)) + { + AddExternalFile(externalFile); + } + } + + public void AddExternalFile(string externalFile) + { + CheckDisposed(); + using (var fs = File.OpenRead(externalFile)) + { + string hash = GetHash(fs); + // $ prefix indicates that the file is not in the archive and path is relative to an external directory + archiveFiles[hash] = new ArchiveFileInfo(null, "$" + hash , hash); + externalFiles[hash] = externalFile; + } + } + public void AddDirectory(string sourceDirectory, IProgress progress, string destinationDirectory = null) + { + var sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*", SearchOption.AllDirectories).ToArray(); + int filesAdded = 0; + sourceFiles.AsParallel().ForAll(sourceFile => + { + string destinationPath = sourceFile.Substring(sourceDirectory.Length + 1); + + if (destinationDirectory != null) + { + destinationPath = Path.Combine(destinationDirectory, destinationPath); + } + + string extension = Path.GetExtension(sourceFile); + + if (ZipExtensions.Any(ze => ze.Equals(extension, StringComparison.OrdinalIgnoreCase))) + { + AddZip(sourceFile, destinationPath); + } + else + { + AddFile(sourceFile, destinationPath); + } + + progress.Report($"Adding {sourceDirectory}", ++filesAdded, sourceFiles.Length); + }); + } + + public void AddZip(string sourceZipFile, string destinationZipFile) + { + using (var sourceArchive = new ZipArchive(File.OpenRead(sourceZipFile), ZipArchiveMode.Read)) + { + foreach(var entry in sourceArchive.Entries) + { + // we can dispose this stream, if AddStream uses it, it will make a copy. + using (var stream = entry.Open()) + { + string destinationPath = $"{destinationZipFile}::{entry.FullName}"; + AddStream(stream, destinationPath); + } + } + } + } + + public void AddFile(string sourceFilePath, string destinationPath) + { + // lifetime of this stream is managed by AddStream + var stream = File.Open(sourceFilePath, FileMode.Open); + AddStream(stream, destinationPath); + } + + public void AddStream(Stream stream, string destinationPath) + { + CheckDisposed(); + + string hash = null; + + if (stream.CanSeek) + { + hash = GetHash(stream); + } + else + { + var copy = CreateTemporaryStream(); +#if NET45 + hash = CopyWithHash(stream, copy); +#else + stream.CopyTo(copy); + copy.Seek(0, SeekOrigin.Begin); + hash = GetHash(copy); +#endif + stream.Dispose(); + stream = copy; + } + + lock (archiveFiles) + { + destFiles.Add(new DestinationFileInfo(destinationPath, hash)); + + // see if we already have this file in the archive/external + ArchiveFileInfo existing = null; + if (archiveFiles.TryGetValue(hash, out existing)) + { + // reduce memory pressure + if (!(stream is MemoryStream) && (existing.Stream is MemoryStream)) + { + // dispose memory stream + existing.Stream.Dispose(); + stream.Seek(0, SeekOrigin.Begin); + existing.Stream = stream; + } + else + { + // we already have a good stream, free this one. + stream.Dispose(); + } + } + else + { + // add a new entry; + stream.Seek(0, SeekOrigin.Begin); + var archivePath = Path.Combine(hash, Path.GetFileName(destinationPath)); + + archiveFiles.Add(hash, new ArchiveFileInfo(stream, archivePath, hash)); + } + } + } + +#if NET45 + /// + /// Calculates the hash while copying the file to avoid multiple reads + /// + private const int _DefaultCopyBufferSize = 81920; + public string CopyWithHash(Stream source, Stream destination) + { + byte[] buffer = new byte[_DefaultCopyBufferSize]; + int read; + while ((read = source.Read(buffer, 0, buffer.Length)) != 0) + { + sha.Value.TransformBlock(buffer, 0, read, null, 0); + destination.Write(buffer, 0, read); + } + sha.Value.TransformFinalBlock(buffer, 0, 0); + var hash = sha.Value.Hash; + + // follow pattern in ComputeHash(stream) where it re-initializes after finishing. + sha.Value.Initialize(); + + return GetHashString(hash); + } +#endif + + + public string GetHash(Stream stream) + { + var hashBytes = sha.Value.ComputeHash(stream); + + return GetHashString(hashBytes); + } + + private static string GetHashString(byte[] hashBytes) + { + StringBuilder builder = new StringBuilder(hashBytes.Length * 2); + foreach (var b in hashBytes) + { + builder.AppendFormat("{0:x2}", b); + } + return builder.ToString(); + } + + public void Dispose() + { + if (!disposed) + { + if (archiveFiles != null) + { + foreach(var archiveFile in archiveFiles.Values) + { + if (archiveFile.Stream != null) + { + archiveFile.Stream.Dispose(); + archiveFile.Stream = null; + } + } + } + + if (sha != null) + { + sha.Dispose(); + sha = null; + } + } + } + + private void CheckDisposed() + { + if (disposed) + { + throw new ObjectDisposedException(nameof(IndexedArchive)); + } + } + } +} diff --git a/src/Microsoft.DotNet.Archive/ProgressReport.cs b/src/Microsoft.DotNet.Archive/ProgressReport.cs new file mode 100644 index 000000000..379378f88 --- /dev/null +++ b/src/Microsoft.DotNet.Archive/ProgressReport.cs @@ -0,0 +1,30 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Archive +{ + public struct ProgressReport + { + public string Phase; + public long Ticks; + public long Total; + } + + public static class ProgressReportExtensions + { + public static void Report(this IProgress progress, string phase, long ticks, long total) + { + progress.Report(new ProgressReport() + { + Phase = phase, + Ticks = ticks, + Total = total + }); + } + } + +} diff --git a/src/Microsoft.DotNet.Archive/ThreadLocalZipArchive.cs b/src/Microsoft.DotNet.Archive/ThreadLocalZipArchive.cs new file mode 100644 index 000000000..9659e8a86 --- /dev/null +++ b/src/Microsoft.DotNet.Archive/ThreadLocalZipArchive.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Archive +{ + // wraps ThreadLocal and exposes Dispose semantics that dispose all archives + class ThreadLocalZipArchive : IDisposable + { + private ThreadLocal _archive; + private bool disposed = false; + + public ThreadLocalZipArchive(string archivePath, ZipArchive local = null) + { + _archive = new ThreadLocal(() => + new ZipArchive(File.Open(archivePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete), ZipArchiveMode.Read), + trackAllValues:true); + + if (local != null) + { + // reuse provided one for current thread + _archive.Value = local; + } + } + + public ZipArchive Archive { get { return _archive.Value; } } + + public void Dispose() + { + if (!disposed) + { + if (_archive != null) + { + // dispose all archives + if (_archive.Values != null) + { + foreach (var value in _archive.Values) + { + if (value != null) + { + value.Dispose(); + } + } + } + + // dispose ThreadLocal + _archive.Dispose(); + _archive = null; + } + } + } + } +} diff --git a/src/Microsoft.DotNet.Archive/project.json b/src/Microsoft.DotNet.Archive/project.json index 7356ba390..0dec6c409 100644 --- a/src/Microsoft.DotNet.Archive/project.json +++ b/src/Microsoft.DotNet.Archive/project.json @@ -8,7 +8,12 @@ "NETStandard.Library": "1.6.0-rc3-24201-00" }, "frameworks": { - "netstandard1.0": {} + "net45": {}, + "netstandard1.3": { + "dependencies": { + "System.Linq.Parallel": "4.0.1-rc3-24201-00" + } + } }, "scripts": {} }