From 18ecf41917aa41a0e69214af5a96cc0a26576f7a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 22 Aug 2018 13:04:12 -0400 Subject: [PATCH] avoid running reconcileStaged when the index has not changed This commit was supported by the NSF-funded DataLad project. --- Annex/Locations.hs | 6 ++++++ Database/Keys.hs | 41 ++++++++++++++++++++++++++++++----------- doc/todo/smudge.mdwn | 3 --- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/Annex/Locations.hs b/Annex/Locations.hs index bb45d8a3e0..7e52dc6115 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -33,6 +33,7 @@ module Annex.Locations ( gitAnnexUnusedLog, gitAnnexKeysDb, gitAnnexKeysDbLock, + gitAnnexKeysDbIndexCache, gitAnnexFsckState, gitAnnexFsckDbDir, gitAnnexFsckDbLock, @@ -285,6 +286,11 @@ gitAnnexKeysDb r = gitAnnexDir r "keys" gitAnnexKeysDbLock :: Git.Repo -> FilePath gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ ".lck" +{- Contains the stat of the last index file that was + - reconciled with rhe keys database. -} +gitAnnexKeysDbIndexCache :: Git.Repo -> FilePath +gitAnnexKeysDbIndexCache r = gitAnnexKeysDb r ++ ".cache" + {- .git/annex/fsck/uuid/ is used to store information about incremental - fscks. -} gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath diff --git a/Database/Keys.hs b/Database/Keys.hs index ca05661cef..766a61c781 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -38,6 +38,7 @@ import Git import Git.FilePath import Git.Command import Git.Types +import Git.Index {- Runs an action that reads from the database. - @@ -190,18 +191,36 @@ removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey - This needs to be run before querying the keys database so that - information is consistent with the state of the repository. - - - TODO To avoid unncessary work, the index file is statted, and if it's not + - To avoid unncessary work, the index file is statted, and if it's not - changed since last time this was run, nothing is done. + - + - Note that this is run with a lock held, so only one process can be + - running this at a time. -} reconcileStaged :: SQL.WriteHandle -> Annex () reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do - (l, cleanup) <- inRepo $ pipeNullSplit diff - changed <- go l False - void $ liftIO cleanup - -- Flush database changes immediately so other processes can see them. - when changed $ - liftIO $ H.flushDbQueue qh + gitindex <- inRepo currentIndexFile + indexcache <- fromRepo gitAnnexKeysDbIndexCache + withTSDelta (liftIO . genInodeCache gitindex) >>= \case + Just cur -> + liftIO (maybe Nothing readInodeCache <$> catchMaybeIO (readFile indexcache)) >>= \case + Nothing -> go cur indexcache + Just prev -> ifM (compareInodeCaches prev cur) + ( noop + , go cur indexcache + ) + Nothing -> noop where + go cur indexcache = do + (l, cleanup) <- inRepo $ pipeNullSplit diff + changed <- procdiff l False + void $ liftIO cleanup + -- Flush database changes immediately + -- so other processes can see them. + when changed $ + liftIO $ H.flushDbQueue qh + liftIO $ writeFile indexcache $ showInodeCache cur + diff = -- Avoid using external diff command, which would be slow. -- (The -G option may make it be used otherwise.) @@ -227,7 +246,7 @@ reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do , Param "--no-ext-diff" ] - go (info:file:rest) changed = case words info of + procdiff (info:file:rest) changed = case words info of ((':':_srcmode):dstmode:_srcsha:dstsha:_change:[]) -- Only want files, not symlinks | dstmode /= fmtTreeItemType TreeSymlink -> do @@ -238,8 +257,8 @@ reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do (toIKey k) (asTopFilePath file) h - go rest True - | otherwise -> go rest changed + procdiff rest True + | otherwise -> procdiff rest changed _ -> return changed -- parse failed - go _ changed = return changed + procdiff _ changed = return changed diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index b00a0e01d7..6199122f9d 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -2,9 +2,6 @@ git-annex should use smudge/clean filters. v6 mode ### August sprint todo list -* Avoid running reconcileStaged when the index has not changed since last - time. - * If `git mv` of an unlocked file is run at the same time as `git annex drop`, and when git-annex starts up, the mv has not happened yet, but once it wants to update the associated file to drop the content, the mv has