optimize smudge --clean of unmodified file
Usually, git won't run clean filter when a file is unmodified. But, when git checkout runs git annex smudge --update, it populates the pointer runs git update-index, which sees the file has changed and runs git annex smudge --clean, which was checksumming the file unncessarily as it re-ingested it. With annex.thin set, this is the difference between git checkout of a branch with a 1 gb file taking 30s and 0.1s. This commit was sponsored by Brett Eisenberg on Patreon.
This commit is contained in:
parent
daa259ec6a
commit
c28ca8294f
2 changed files with 40 additions and 15 deletions
|
@ -46,6 +46,7 @@ module Annex.Content (
|
||||||
staleKeysPrune,
|
staleKeysPrune,
|
||||||
pruneTmpWorkDirBefore,
|
pruneTmpWorkDirBefore,
|
||||||
isUnmodified,
|
isUnmodified,
|
||||||
|
isUnmodifiedCheap,
|
||||||
verifyKeyContent,
|
verifyKeyContent,
|
||||||
VerifyConfig(..),
|
VerifyConfig(..),
|
||||||
Verification(..),
|
Verification(..),
|
||||||
|
@ -746,25 +747,38 @@ isUnmodified :: Key -> FilePath -> Annex Bool
|
||||||
isUnmodified key f = go =<< geti
|
isUnmodified key f = go =<< geti
|
||||||
where
|
where
|
||||||
go Nothing = return False
|
go Nothing = return False
|
||||||
go (Just fc) = cheapcheck fc <||> expensivecheck fc
|
go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc
|
||||||
cheapcheck fc = anyM (compareInodeCaches fc)
|
|
||||||
=<< Database.Keys.getInodeCaches key
|
|
||||||
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f)
|
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f)
|
||||||
( do
|
( do
|
||||||
|
liftIO $ print "content verified"
|
||||||
-- The file could have been modified while it was
|
-- The file could have been modified while it was
|
||||||
-- being verified. Detect that.
|
-- being verified. Detect that.
|
||||||
ifM (geti >>= maybe (return False) (compareInodeCaches fc))
|
ifM (geti >>= maybe (return False) (compareInodeCaches fc))
|
||||||
( do
|
( do
|
||||||
-- Update the InodeCache to avoid
|
-- Update the InodeCache to avoid
|
||||||
-- performing this expensive check again.
|
-- performing this expensive check again.
|
||||||
|
liftIO $ print "update inode cache"
|
||||||
Database.Keys.addInodeCaches key [fc]
|
Database.Keys.addInodeCaches key [fc]
|
||||||
return True
|
return True
|
||||||
, return False
|
, return False
|
||||||
)
|
)
|
||||||
, return False
|
, do
|
||||||
|
liftIO $ print "content not verified"
|
||||||
|
return False
|
||||||
)
|
)
|
||||||
geti = withTSDelta (liftIO . genInodeCache f)
|
geti = withTSDelta (liftIO . genInodeCache f)
|
||||||
|
|
||||||
|
{- Cheap check if a file contains the unmodified content of the key,
|
||||||
|
- only checking the InodeCache of the key.
|
||||||
|
-}
|
||||||
|
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
|
||||||
|
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
|
||||||
|
=<< withTSDelta (liftIO . genInodeCache f)
|
||||||
|
|
||||||
|
isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool
|
||||||
|
isUnmodifiedCheap' key fc =
|
||||||
|
anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key
|
||||||
|
|
||||||
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
|
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
|
||||||
- returns the file it was moved to. -}
|
- returns the file it was moved to. -}
|
||||||
moveBad :: Key -> Annex FilePath
|
moveBad :: Key -> Annex FilePath
|
||||||
|
|
|
@ -95,6 +95,19 @@ clean file = do
|
||||||
if Git.BuildVersion.older "2.5"
|
if Git.BuildVersion.older "2.5"
|
||||||
then B.length b `seq` return ()
|
then B.length b `seq` return ()
|
||||||
else liftIO $ hClose stdin
|
else liftIO $ hClose stdin
|
||||||
|
|
||||||
|
-- Optimization when the file is already annexed
|
||||||
|
-- and is unmodified.
|
||||||
|
case oldkey of
|
||||||
|
Nothing -> ingest oldkey
|
||||||
|
Just ko -> ifM (isUnmodifiedCheap ko file)
|
||||||
|
( liftIO $ emitPointer ko
|
||||||
|
, ingest oldkey
|
||||||
|
)
|
||||||
|
, liftIO $ B.hPut stdout b
|
||||||
|
)
|
||||||
|
|
||||||
|
ingest oldkey = do
|
||||||
-- Look up the backend that was used for this file
|
-- Look up the backend that was used for this file
|
||||||
-- before, so that when git re-cleans a file its
|
-- before, so that when git re-cleans a file its
|
||||||
-- backend does not change.
|
-- backend does not change.
|
||||||
|
@ -106,8 +119,6 @@ clean file = do
|
||||||
=<< postingest
|
=<< postingest
|
||||||
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
||||||
=<< lockDown cfg file
|
=<< lockDown cfg file
|
||||||
, liftIO $ B.hPut stdout b
|
|
||||||
)
|
|
||||||
|
|
||||||
postingest (Just k, _) = do
|
postingest (Just k, _) = do
|
||||||
logStatus k InfoPresent
|
logStatus k InfoPresent
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue