finally fixed v6 get/drop git status

After updating the worktree for an add/drop, update git's index, so git
status will not show the files as modified.

What actually happens is that the index update removes the inode
information from the index. The next git status (or similar) run
then has to do some work. It runs the clean filter.

So, this depends on the clean filter being reasonably fast and on git
not leaking memory when running it. Both problems were fixed in
a96972015d, but only for git 2.5. Anyone
using an older git will see very expensive git status after an add/drop.

This uses the same git update-index queue as other parts of git-annex, so
the actual index update is fairly efficient. Of course, updating the index
does still have some overhead. The annex.queuesize config will control how
often the index gets updated when working on a lot of files.

This is an imperfect workaround... Added several todos about new
problems this workaround causes. Still, this seems a lot better than the
old behavior.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2018-08-14 14:22:23 -04:00
parent 06fd4657db
commit 48e9e12961
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 76 additions and 102 deletions

View file

@ -24,6 +24,7 @@ module Annex.Content (
checkDiskSpace,
needMoreDiskSpace,
moveAnnex,
Restage(..),
populatePointerFile,
linkToAnnex,
linkFromAnnex,
@ -545,7 +546,7 @@ moveAnnex key src = ifM (checkSecureHashes key)
fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
unless (null fs) $ do
mapM_ (populatePointerFile key dest) fs
mapM_ (populatePointerFile (Restage True) key dest) fs
Database.Keys.storeInodeCaches key (dest:fs)
)
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
@ -586,14 +587,23 @@ checkSecureHashes key
, return True
)
populatePointerFile :: Key -> FilePath -> FilePath -> Annex ()
populatePointerFile k obj f = go =<< liftIO (isPointerFile f)
newtype Restage = Restage Bool
{- Populates a pointer file with the content of a key. -}
populatePointerFile :: Restage -> Key -> FilePath -> FilePath -> Annex ()
populatePointerFile (Restage restage) k obj f = go =<< liftIO (isPointerFile f)
where
go (Just k') | k == k' = do
destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f
liftIO $ nukeFile f
ifM (linkOrCopy k obj f destmode)
( thawContent f
( do
thawContent f
-- The pointer file is re-staged,
-- so git won't think it's been modified.
when restage $ do
pointersha <- hashPointerFile k
stagePointerFile f destmode pointersha
, liftIO $ writePointerFile f k destmode
)
go _ = return ()
@ -816,12 +826,6 @@ cleanObjectLoc key cleaner = do
<=< catchMaybeIO $ removeDirectory dir
{- Removes a key's file from .git/annex/objects/
-
- When a key has associated pointer files, they are checked for
- modifications, and if unmodified, are reset.
-
- In direct mode, deletes the associated files or files, and replaces
- them with symlinks.
-}
removeAnnex :: ContentRemovalLock -> Annex ()
removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
@ -834,22 +838,33 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
=<< Database.Keys.getAssociatedFiles key
Database.Keys.removeInodeCaches key
Direct.removeInodeCache key
-- Check associated pointer file for modifications, and reset if
-- it's unmodified.
resetpointer file = ifM (isUnmodified key file)
( do
mode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus file
secureErase file
liftIO $ nukeFile file
liftIO $ writePointerFile file key mode
-- Can't delete the pointer file.
-- Re-stage the pointer, so git won't think it's
-- been modified.
pointersha <- hashPointerFile key
stagePointerFile file mode pointersha
-- Modified file, so leave it alone.
-- If it was a hard link to the annex object,
-- that object might have been frozen as part of the
-- removal process, so thaw it.
, void $ tryIO $ thawContent file
)
-- In direct mode, deletes the associated files or files, and replaces
-- them with symlinks.
removedirect fs = do
cache <- Direct.recordedInodeCache key
Direct.removeInodeCache key
mapM_ (resetfile cache) fs
resetfile cache f = whenM (Direct.sameInodeCache f cache) $ do
l <- calcRepo $ gitAnnexLink f key
secureErase f

View file

@ -140,14 +140,13 @@ ingestAdd' ld@(Just (LockedDown cfg source)) mk = do
return (Just k)
{- Ingests a locked down file into the annex. Does not update the working
- tree or the index.
-}
- tree or the index. -}
ingest :: Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache)
ingest = ingest' Nothing
ingest ld mk = ingest' Nothing ld mk (Restage True)
ingest' :: Maybe Backend -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache)
ingest' _ Nothing _ = return (Nothing, Nothing)
ingest' preferredbackend (Just (LockedDown cfg source)) mk = withTSDelta $ \delta -> do
ingest' :: Maybe Backend -> Maybe LockedDown -> Maybe Key -> Restage -> Annex (Maybe Key, Maybe InodeCache)
ingest' _ Nothing _ _ = return (Nothing, Nothing)
ingest' preferredbackend (Just (LockedDown cfg source)) mk restage = withTSDelta $ \delta -> do
k <- case mk of
Nothing -> do
backend <- maybe (chooseBackend $ keyFilename source) (return . Just) preferredbackend
@ -172,7 +171,7 @@ ingest' preferredbackend (Just (LockedDown cfg source)) mk = withTSDelta $ \delt
golocked key mcache s =
tryNonAsync (moveAnnex key $ contentLocation source) >>= \case
Right True -> do
populateAssociatedFiles key source
populateAssociatedFiles key source restage
success key mcache s
Right False -> giveup "failed to add content to annex"
Left e -> restoreFile (keyFilename source) key e
@ -186,7 +185,7 @@ ingest' preferredbackend (Just (LockedDown cfg source)) mk = withTSDelta $ \delt
linkToAnnex key (keyFilename source) (Just cache) >>= \case
LinkAnnexFailed -> failure "failed to link to annex"
_ -> do
finishIngestUnlocked' key source
finishIngestUnlocked' key source restage
success key (Just cache) s
gounlocked _ _ _ = failure "failed statting file"
@ -218,23 +217,23 @@ finishIngestDirect key source = do
finishIngestUnlocked :: Key -> KeySource -> Annex ()
finishIngestUnlocked key source = do
cleanCruft source
finishIngestUnlocked' key source
finishIngestUnlocked' key source (Restage True)
finishIngestUnlocked' :: Key -> KeySource -> Annex ()
finishIngestUnlocked' key source = do
finishIngestUnlocked' :: Key -> KeySource -> Restage -> Annex ()
finishIngestUnlocked' key source restage = do
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath (keyFilename source))
populateAssociatedFiles key source
populateAssociatedFiles key source restage
{- Copy to any other locations using the same key. -}
populateAssociatedFiles :: Key -> KeySource -> Annex ()
populateAssociatedFiles key source = do
populateAssociatedFiles :: Key -> KeySource -> Restage -> Annex ()
populateAssociatedFiles key source restage = do
obj <- calcRepo (gitAnnexLocation key)
g <- Annex.gitRepo
ingestedf <- flip fromTopFilePath g
<$> inRepo (toTopFilePath (keyFilename source))
afs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key
forM_ (filter (/= ingestedf) afs) $
populatePointerFile key obj
populatePointerFile restage key obj
cleanCruft :: KeySource -> Annex ()
cleanCruft source = when (contentLocation source /= keyFilename source) $