avoid running reconcileStaged when the index has not changed

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2018-08-22 13:04:12 -04:00
parent 65bd018f94
commit 18ecf41917
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 36 additions and 14 deletions

View file

@ -33,6 +33,7 @@ module Annex.Locations (
gitAnnexUnusedLog,
gitAnnexKeysDb,
gitAnnexKeysDbLock,
gitAnnexKeysDbIndexCache,
gitAnnexFsckState,
gitAnnexFsckDbDir,
gitAnnexFsckDbLock,
@ -285,6 +286,11 @@ gitAnnexKeysDb r = gitAnnexDir r </> "keys"
gitAnnexKeysDbLock :: Git.Repo -> FilePath
gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ ".lck"
{- Contains the stat of the last index file that was
- reconciled with rhe keys database. -}
gitAnnexKeysDbIndexCache :: Git.Repo -> FilePath
gitAnnexKeysDbIndexCache r = gitAnnexKeysDb r ++ ".cache"
{- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -}
gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath

View file

@ -38,6 +38,7 @@ import Git
import Git.FilePath
import Git.Command
import Git.Types
import Git.Index
{- Runs an action that reads from the database.
-
@ -190,18 +191,36 @@ removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey
- This needs to be run before querying the keys database so that
- information is consistent with the state of the repository.
-
- TODO To avoid unncessary work, the index file is statted, and if it's not
- To avoid unncessary work, the index file is statted, and if it's not
- changed since last time this was run, nothing is done.
-
- Note that this is run with a lock held, so only one process can be
- running this at a time.
-}
reconcileStaged :: SQL.WriteHandle -> Annex ()
reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do
(l, cleanup) <- inRepo $ pipeNullSplit diff
changed <- go l False
void $ liftIO cleanup
-- Flush database changes immediately so other processes can see them.
when changed $
liftIO $ H.flushDbQueue qh
gitindex <- inRepo currentIndexFile
indexcache <- fromRepo gitAnnexKeysDbIndexCache
withTSDelta (liftIO . genInodeCache gitindex) >>= \case
Just cur ->
liftIO (maybe Nothing readInodeCache <$> catchMaybeIO (readFile indexcache)) >>= \case
Nothing -> go cur indexcache
Just prev -> ifM (compareInodeCaches prev cur)
( noop
, go cur indexcache
)
Nothing -> noop
where
go cur indexcache = do
(l, cleanup) <- inRepo $ pipeNullSplit diff
changed <- procdiff l False
void $ liftIO cleanup
-- Flush database changes immediately
-- so other processes can see them.
when changed $
liftIO $ H.flushDbQueue qh
liftIO $ writeFile indexcache $ showInodeCache cur
diff =
-- Avoid using external diff command, which would be slow.
-- (The -G option may make it be used otherwise.)
@ -227,7 +246,7 @@ reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do
, Param "--no-ext-diff"
]
go (info:file:rest) changed = case words info of
procdiff (info:file:rest) changed = case words info of
((':':_srcmode):dstmode:_srcsha:dstsha:_change:[])
-- Only want files, not symlinks
| dstmode /= fmtTreeItemType TreeSymlink -> do
@ -238,8 +257,8 @@ reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do
(toIKey k)
(asTopFilePath file)
h
go rest True
| otherwise -> go rest changed
procdiff rest True
| otherwise -> procdiff rest changed
_ -> return changed -- parse failed
go _ changed = return changed
procdiff _ changed = return changed

View file

@ -2,9 +2,6 @@ git-annex should use smudge/clean filters. v6 mode
### August sprint todo list
* Avoid running reconcileStaged when the index has not changed since last
time.
* If `git mv` of an unlocked file is run at the same time as `git annex drop`,
and when git-annex starts up, the mv has not happened yet, but once it
wants to update the associated file to drop the content, the mv has