avoid running reconcileStaged when the index has not changed

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2018-08-22 13:04:12 -04:00
parent 65bd018f94
commit 18ecf41917
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 36 additions and 14 deletions

View file

@ -33,6 +33,7 @@ module Annex.Locations (
gitAnnexUnusedLog, gitAnnexUnusedLog,
gitAnnexKeysDb, gitAnnexKeysDb,
gitAnnexKeysDbLock, gitAnnexKeysDbLock,
gitAnnexKeysDbIndexCache,
gitAnnexFsckState, gitAnnexFsckState,
gitAnnexFsckDbDir, gitAnnexFsckDbDir,
gitAnnexFsckDbLock, gitAnnexFsckDbLock,
@ -285,6 +286,11 @@ gitAnnexKeysDb r = gitAnnexDir r </> "keys"
gitAnnexKeysDbLock :: Git.Repo -> FilePath gitAnnexKeysDbLock :: Git.Repo -> FilePath
gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ ".lck" gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ ".lck"
{- Contains the stat of the last index file that was
- reconciled with rhe keys database. -}
gitAnnexKeysDbIndexCache :: Git.Repo -> FilePath
gitAnnexKeysDbIndexCache r = gitAnnexKeysDb r ++ ".cache"
{- .git/annex/fsck/uuid/ is used to store information about incremental {- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -} - fscks. -}
gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath

View file

@ -38,6 +38,7 @@ import Git
import Git.FilePath import Git.FilePath
import Git.Command import Git.Command
import Git.Types import Git.Types
import Git.Index
{- Runs an action that reads from the database. {- Runs an action that reads from the database.
- -
@ -190,18 +191,36 @@ removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey
- This needs to be run before querying the keys database so that - This needs to be run before querying the keys database so that
- information is consistent with the state of the repository. - information is consistent with the state of the repository.
- -
- TODO To avoid unncessary work, the index file is statted, and if it's not - To avoid unncessary work, the index file is statted, and if it's not
- changed since last time this was run, nothing is done. - changed since last time this was run, nothing is done.
-
- Note that this is run with a lock held, so only one process can be
- running this at a time.
-} -}
reconcileStaged :: SQL.WriteHandle -> Annex () reconcileStaged :: SQL.WriteHandle -> Annex ()
reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do
(l, cleanup) <- inRepo $ pipeNullSplit diff gitindex <- inRepo currentIndexFile
changed <- go l False indexcache <- fromRepo gitAnnexKeysDbIndexCache
void $ liftIO cleanup withTSDelta (liftIO . genInodeCache gitindex) >>= \case
-- Flush database changes immediately so other processes can see them. Just cur ->
when changed $ liftIO (maybe Nothing readInodeCache <$> catchMaybeIO (readFile indexcache)) >>= \case
liftIO $ H.flushDbQueue qh Nothing -> go cur indexcache
Just prev -> ifM (compareInodeCaches prev cur)
( noop
, go cur indexcache
)
Nothing -> noop
where where
go cur indexcache = do
(l, cleanup) <- inRepo $ pipeNullSplit diff
changed <- procdiff l False
void $ liftIO cleanup
-- Flush database changes immediately
-- so other processes can see them.
when changed $
liftIO $ H.flushDbQueue qh
liftIO $ writeFile indexcache $ showInodeCache cur
diff = diff =
-- Avoid using external diff command, which would be slow. -- Avoid using external diff command, which would be slow.
-- (The -G option may make it be used otherwise.) -- (The -G option may make it be used otherwise.)
@ -227,7 +246,7 @@ reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do
, Param "--no-ext-diff" , Param "--no-ext-diff"
] ]
go (info:file:rest) changed = case words info of procdiff (info:file:rest) changed = case words info of
((':':_srcmode):dstmode:_srcsha:dstsha:_change:[]) ((':':_srcmode):dstmode:_srcsha:dstsha:_change:[])
-- Only want files, not symlinks -- Only want files, not symlinks
| dstmode /= fmtTreeItemType TreeSymlink -> do | dstmode /= fmtTreeItemType TreeSymlink -> do
@ -238,8 +257,8 @@ reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do
(toIKey k) (toIKey k)
(asTopFilePath file) (asTopFilePath file)
h h
go rest True procdiff rest True
| otherwise -> go rest changed | otherwise -> procdiff rest changed
_ -> return changed -- parse failed _ -> return changed -- parse failed
go _ changed = return changed procdiff _ changed = return changed

View file

@ -2,9 +2,6 @@ git-annex should use smudge/clean filters. v6 mode
### August sprint todo list ### August sprint todo list
* Avoid running reconcileStaged when the index has not changed since last
time.
* If `git mv` of an unlocked file is run at the same time as `git annex drop`, * If `git mv` of an unlocked file is run at the same time as `git annex drop`,
and when git-annex starts up, the mv has not happened yet, but once it and when git-annex starts up, the mv has not happened yet, but once it
wants to update the associated file to drop the content, the mv has wants to update the associated file to drop the content, the mv has