From c28ca8294f7695c77e5f03762171e829de5d6ea4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 25 Oct 2018 16:38:04 -0400 Subject: [PATCH] optimize smudge --clean of unmodified file Usually, git won't run clean filter when a file is unmodified. But, when git checkout runs git annex smudge --update, it populates the pointer runs git update-index, which sees the file has changed and runs git annex smudge --clean, which was checksumming the file unncessarily as it re-ingested it. With annex.thin set, this is the difference between git checkout of a branch with a 1 gb file taking 30s and 0.1s. This commit was sponsored by Brett Eisenberg on Patreon. --- Annex/Content.hs | 22 ++++++++++++++++++---- Command/Smudge.hs | 33 ++++++++++++++++++++++----------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index e814b0c636..e7574d87be 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -46,6 +46,7 @@ module Annex.Content ( staleKeysPrune, pruneTmpWorkDirBefore, isUnmodified, + isUnmodifiedCheap, verifyKeyContent, VerifyConfig(..), Verification(..), @@ -746,25 +747,38 @@ isUnmodified :: Key -> FilePath -> Annex Bool isUnmodified key f = go =<< geti where go Nothing = return False - go (Just fc) = cheapcheck fc <||> expensivecheck fc - cheapcheck fc = anyM (compareInodeCaches fc) - =<< Database.Keys.getInodeCaches key + go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f) ( do + liftIO $ print "content verified" -- The file could have been modified while it was -- being verified. Detect that. ifM (geti >>= maybe (return False) (compareInodeCaches fc)) ( do -- Update the InodeCache to avoid -- performing this expensive check again. + liftIO $ print "update inode cache" Database.Keys.addInodeCaches key [fc] return True , return False ) - , return False + , do + liftIO $ print "content not verified" + return False ) geti = withTSDelta (liftIO . genInodeCache f) +{- Cheap check if a file contains the unmodified content of the key, + - only checking the InodeCache of the key. + -} +isUnmodifiedCheap :: Key -> FilePath -> Annex Bool +isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key) + =<< withTSDelta (liftIO . genInodeCache f) + +isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool +isUnmodifiedCheap' key fc = + anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key + {- Moves a key out of .git/annex/objects/ into .git/annex/bad, and - returns the file it was moved to. -} moveBad :: Key -> Annex FilePath diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 68488cc796..af80f1122a 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -95,19 +95,30 @@ clean file = do if Git.BuildVersion.older "2.5" then B.length b `seq` return () else liftIO $ hClose stdin - -- Look up the backend that was used for this file - -- before, so that when git re-cleans a file its - -- backend does not change. - let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey - -- Can't restage associated files because git add - -- runs this and has the index locked. - let norestage = Restage False - liftIO . emitPointer - =<< postingest - =<< (\ld -> ingest' oldbackend ld Nothing norestage) - =<< lockDown cfg file + + -- Optimization when the file is already annexed + -- and is unmodified. + case oldkey of + Nothing -> ingest oldkey + Just ko -> ifM (isUnmodifiedCheap ko file) + ( liftIO $ emitPointer ko + , ingest oldkey + ) , liftIO $ B.hPut stdout b ) + + ingest oldkey = do + -- Look up the backend that was used for this file + -- before, so that when git re-cleans a file its + -- backend does not change. + let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey + -- Can't restage associated files because git add + -- runs this and has the index locked. + let norestage = Restage False + liftIO . emitPointer + =<< postingest + =<< (\ld -> ingest' oldbackend ld Nothing norestage) + =<< lockDown cfg file postingest (Just k, _) = do logStatus k InfoPresent