optimize smudge --clean of unmodified file
Usually, git won't run clean filter when a file is unmodified. But, when git checkout runs git annex smudge --update, it populates the pointer runs git update-index, which sees the file has changed and runs git annex smudge --clean, which was checksumming the file unncessarily as it re-ingested it. With annex.thin set, this is the difference between git checkout of a branch with a 1 gb file taking 30s and 0.1s. This commit was sponsored by Brett Eisenberg on Patreon.
This commit is contained in:
parent
daa259ec6a
commit
c28ca8294f
2 changed files with 40 additions and 15 deletions
|
@ -46,6 +46,7 @@ module Annex.Content (
|
|||
staleKeysPrune,
|
||||
pruneTmpWorkDirBefore,
|
||||
isUnmodified,
|
||||
isUnmodifiedCheap,
|
||||
verifyKeyContent,
|
||||
VerifyConfig(..),
|
||||
Verification(..),
|
||||
|
@ -746,25 +747,38 @@ isUnmodified :: Key -> FilePath -> Annex Bool
|
|||
isUnmodified key f = go =<< geti
|
||||
where
|
||||
go Nothing = return False
|
||||
go (Just fc) = cheapcheck fc <||> expensivecheck fc
|
||||
cheapcheck fc = anyM (compareInodeCaches fc)
|
||||
=<< Database.Keys.getInodeCaches key
|
||||
go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc
|
||||
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f)
|
||||
( do
|
||||
liftIO $ print "content verified"
|
||||
-- The file could have been modified while it was
|
||||
-- being verified. Detect that.
|
||||
ifM (geti >>= maybe (return False) (compareInodeCaches fc))
|
||||
( do
|
||||
-- Update the InodeCache to avoid
|
||||
-- performing this expensive check again.
|
||||
liftIO $ print "update inode cache"
|
||||
Database.Keys.addInodeCaches key [fc]
|
||||
return True
|
||||
, return False
|
||||
)
|
||||
, return False
|
||||
, do
|
||||
liftIO $ print "content not verified"
|
||||
return False
|
||||
)
|
||||
geti = withTSDelta (liftIO . genInodeCache f)
|
||||
|
||||
{- Cheap check if a file contains the unmodified content of the key,
|
||||
- only checking the InodeCache of the key.
|
||||
-}
|
||||
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
|
||||
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
|
||||
=<< withTSDelta (liftIO . genInodeCache f)
|
||||
|
||||
isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool
|
||||
isUnmodifiedCheap' key fc =
|
||||
anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key
|
||||
|
||||
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
|
||||
- returns the file it was moved to. -}
|
||||
moveBad :: Key -> Annex FilePath
|
||||
|
|
|
@ -95,19 +95,30 @@ clean file = do
|
|||
if Git.BuildVersion.older "2.5"
|
||||
then B.length b `seq` return ()
|
||||
else liftIO $ hClose stdin
|
||||
-- Look up the backend that was used for this file
|
||||
-- before, so that when git re-cleans a file its
|
||||
-- backend does not change.
|
||||
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
|
||||
-- Can't restage associated files because git add
|
||||
-- runs this and has the index locked.
|
||||
let norestage = Restage False
|
||||
liftIO . emitPointer
|
||||
=<< postingest
|
||||
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
||||
=<< lockDown cfg file
|
||||
|
||||
-- Optimization when the file is already annexed
|
||||
-- and is unmodified.
|
||||
case oldkey of
|
||||
Nothing -> ingest oldkey
|
||||
Just ko -> ifM (isUnmodifiedCheap ko file)
|
||||
( liftIO $ emitPointer ko
|
||||
, ingest oldkey
|
||||
)
|
||||
, liftIO $ B.hPut stdout b
|
||||
)
|
||||
|
||||
ingest oldkey = do
|
||||
-- Look up the backend that was used for this file
|
||||
-- before, so that when git re-cleans a file its
|
||||
-- backend does not change.
|
||||
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
|
||||
-- Can't restage associated files because git add
|
||||
-- runs this and has the index locked.
|
||||
let norestage = Restage False
|
||||
liftIO . emitPointer
|
||||
=<< postingest
|
||||
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
||||
=<< lockDown cfg file
|
||||
|
||||
postingest (Just k, _) = do
|
||||
logStatus k InfoPresent
|
||||
|
|
Loading…
Reference in a new issue