optimize smudge --clean of unmodified file

Usually, git won't run clean filter when a file is unmodified. But, when
git checkout runs git annex smudge --update, it populates the pointer
runs git update-index, which sees the file has changed and runs
git annex smudge --clean, which was checksumming the file unncessarily
as it re-ingested it.

With annex.thin set, this is the difference between git checkout of a
branch with a 1 gb file taking 30s and 0.1s.

This commit was sponsored by Brett Eisenberg on Patreon.
This commit is contained in:
Joey Hess 2018-10-25 16:38:04 -04:00
parent daa259ec6a
commit c28ca8294f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 40 additions and 15 deletions

View file

@ -95,19 +95,30 @@ clean file = do
if Git.BuildVersion.older "2.5"
then B.length b `seq` return ()
else liftIO $ hClose stdin
-- Look up the backend that was used for this file
-- before, so that when git re-cleans a file its
-- backend does not change.
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
-- Can't restage associated files because git add
-- runs this and has the index locked.
let norestage = Restage False
liftIO . emitPointer
=<< postingest
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
=<< lockDown cfg file
-- Optimization when the file is already annexed
-- and is unmodified.
case oldkey of
Nothing -> ingest oldkey
Just ko -> ifM (isUnmodifiedCheap ko file)
( liftIO $ emitPointer ko
, ingest oldkey
)
, liftIO $ B.hPut stdout b
)
ingest oldkey = do
-- Look up the backend that was used for this file
-- before, so that when git re-cleans a file its
-- backend does not change.
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
-- Can't restage associated files because git add
-- runs this and has the index locked.
let norestage = Restage False
liftIO . emitPointer
=<< postingest
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
=<< lockDown cfg file
postingest (Just k, _) = do
logStatus k InfoPresent