diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index d7e423c6e8..95e0d18e2b 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -53,15 +53,6 @@ whenAnnexed a file = ifAnnexed file (a file) (return Nothing) ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a ifAnnexed file yes no = maybe no yes =<< lookupKey file -{- Find all annexed files and update the keys database for them. - - - - Normally the keys database is updated incrementally when it's being - - opened, and changes are noticed. Calling this explicitly allows - - running the update at an earlier point. - - - - All that needs to be done is to open the database, - - that will result in Database.Keys.reconcileStaged - - running, and doing the work. - -} +{- Find all annexed files and update the keys database for them. -} scanAnnexedFiles :: Annex () -scanAnnexedFiles = Database.Keys.runWriter (const noop) +scanAnnexedFiles = Database.Keys.updateDatabase diff --git a/CHANGELOG b/CHANGELOG index 56f2c781fe..6c045d7609 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ git-annex (10.20221004) UNRELEASED; urgency=medium + * Doubled the speed of git-annex drop when operating on many files, + and of git-annex get when operating on many tiny files. * trust, untrust, semitrust, dead: Fix behavior when provided with multiple repositories to operate on. * trust, untrust, semitrust, dead: When provided with no parameters, diff --git a/Database/Keys.hs b/Database/Keys.hs index 6b4f3e4782..45f8d2f851 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -25,11 +25,13 @@ module Database.Keys ( removeInodeCache, isInodeKnown, runWriter, + updateDatabase, ) where import qualified Database.Keys.SQL as SQL import Database.Types import Database.Keys.Handle +import Database.Keys.Tables import qualified Database.Queue as H import Database.Init import Annex.Locations @@ -64,49 +66,53 @@ import Control.Concurrent.Async - If the database is already open, any writes are flushed to it, to ensure - consistency. - - - Any queued writes will be flushed before the read. + - Any queued writes to the table will be flushed before the read. -} -runReader :: Monoid v => (SQL.ReadHandle -> Annex v) -> Annex v -runReader a = do +runReader :: Monoid v => DbTable -> (SQL.ReadHandle -> Annex v) -> Annex v +runReader t a = do h <- Annex.getRead Annex.keysdbhandle withDbState h go where go DbUnavailable = return (mempty, DbUnavailable) - go st@(DbOpen qh) = do - liftIO $ H.flushDbQueue qh + go (DbOpen (qh, tableschanged)) = do + tableschanged' <- if isDbTableChanged tableschanged t + then do + liftIO $ H.flushDbQueue qh + return mempty + else return tableschanged v <- a (SQL.ReadHandle qh) - return (v, st) + return (v, DbOpen (qh, tableschanged')) go DbClosed = do - st' <- openDb False DbClosed - v <- case st' of - (DbOpen qh) -> a (SQL.ReadHandle qh) + st <- openDb False DbClosed + v <- case st of + (DbOpen (qh, _)) -> a (SQL.ReadHandle qh) _ -> return mempty - return (v, st') + return (v, st) -runReaderIO :: Monoid v => (SQL.ReadHandle -> IO v) -> Annex v -runReaderIO a = runReader (liftIO . a) +runReaderIO :: Monoid v => DbTable -> (SQL.ReadHandle -> IO v) -> Annex v +runReaderIO t a = runReader t (liftIO . a) {- Runs an action that writes to the database. Typically this is used to - queue changes, which will be flushed at a later point. - - The database is created if it doesn't exist yet. -} -runWriter :: (SQL.WriteHandle -> Annex ()) -> Annex () -runWriter a = do +runWriter :: DbTable -> (SQL.WriteHandle -> Annex ()) -> Annex () +runWriter t a = do h <- Annex.getRead Annex.keysdbhandle withDbState h go where - go st@(DbOpen qh) = do + go (DbOpen (qh, tableschanged)) = do v <- a (SQL.WriteHandle qh) - return (v, st) + return (v, DbOpen (qh, addDbTable tableschanged t)) go st = do st' <- openDb True st v <- case st' of - DbOpen qh -> a (SQL.WriteHandle qh) + DbOpen (qh, _) -> a (SQL.WriteHandle qh) _ -> error "internal" return (v, st') -runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex () -runWriterIO a = runWriter (liftIO . a) +runWriterIO :: DbTable -> (SQL.WriteHandle -> IO ()) -> Annex () +runWriterIO t a = runWriter t (liftIO . a) {- Opens the database, creating it if it doesn't exist yet. - @@ -139,8 +145,8 @@ openDb forwrite _ = do open db = do qh <- liftIO $ H.openDbQueue db SQL.containedTable - reconcileStaged qh - return $ DbOpen qh + tc <- reconcileStaged qh + return $ DbOpen (qh, tc) {- Closes the database if it was open. Any writes will be flushed to it. - @@ -155,12 +161,13 @@ flushDb :: Annex () flushDb = liftIO . flushDbQueue =<< Annex.getRead Annex.keysdbhandle addAssociatedFile :: Key -> TopFilePath -> Annex () -addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f +addAssociatedFile k f = runWriterIO AssociatedTable $ SQL.addAssociatedFile k f {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} getAssociatedFiles :: Key -> Annex [TopFilePath] -getAssociatedFiles k = emptyWhenBare $ runReaderIO $ SQL.getAssociatedFiles k +getAssociatedFiles k = emptyWhenBare $ runReaderIO AssociatedTable $ + SQL.getAssociatedFiles k {- Queries for associated files never return anything when in a bare - repository, since without a work tree there can be no associated files. @@ -186,10 +193,12 @@ getAssociatedFilesIncluding afile k = emptyWhenBare $ do {- Gets any keys that are on record as having a particular associated file. - (Should be one or none but the database doesn't enforce that.) -} getAssociatedKey :: TopFilePath -> Annex [Key] -getAssociatedKey f = emptyWhenBare $ runReaderIO $ SQL.getAssociatedKey f +getAssociatedKey f = emptyWhenBare $ runReaderIO AssociatedTable $ + SQL.getAssociatedKey f removeAssociatedFile :: Key -> TopFilePath -> Annex () -removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile k +removeAssociatedFile k = runWriterIO AssociatedTable . + SQL.removeAssociatedFile k {- Stats the files, and stores their InodeCaches. -} storeInodeCaches :: Key -> [RawFilePath] -> Annex () @@ -198,7 +207,7 @@ storeInodeCaches k fs = withTSDelta $ \d -> =<< liftIO (mapM (\f -> genInodeCache f d) fs) addInodeCaches :: Key -> [InodeCache] -> Annex () -addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is +addInodeCaches k is = runWriterIO ContentTable $ SQL.addInodeCaches k is {- A key may have multiple InodeCaches; one for the annex object, and one - for each pointer file that is a copy of it. @@ -210,18 +219,19 @@ addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is - for pointer files, but none recorded for the annex object. -} getInodeCaches :: Key -> Annex [InodeCache] -getInodeCaches = runReaderIO . SQL.getInodeCaches +getInodeCaches = runReaderIO ContentTable . SQL.getInodeCaches {- Remove all inodes cached for a key. -} removeInodeCaches :: Key -> Annex () -removeInodeCaches = runWriterIO . SQL.removeInodeCaches +removeInodeCaches = runWriterIO ContentTable . SQL.removeInodeCaches {- Remove cached inodes, for any key. -} removeInodeCache :: InodeCache -> Annex () -removeInodeCache = runWriterIO . SQL.removeInodeCache +removeInodeCache = runWriterIO ContentTable . SQL.removeInodeCache isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool -isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s) +isInodeKnown i s = or <$> runReaderIO ContentTable + ((:[]) <$$> SQL.isInodeKnown i s) {- Looks at staged changes to annexed files, and updates the keys database, - so that its information is consistent with the state of the repository. @@ -250,18 +260,21 @@ isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s) - So when using getAssociatedFiles, have to make sure the file still - is an associated file. -} -reconcileStaged :: H.DbQueue -> Annex () -reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do - gitindex <- inRepo currentIndexFile - indexcache <- fromRawFilePath <$> calcRepo' gitAnnexKeysDbIndexCache - withTSDelta (liftIO . genInodeCache gitindex) >>= \case - Just cur -> readindexcache indexcache >>= \case - Nothing -> go cur indexcache =<< getindextree - Just prev -> ifM (compareInodeCaches prev cur) - ( noop - , go cur indexcache =<< getindextree - ) - Nothing -> noop +reconcileStaged :: H.DbQueue -> Annex DbTablesChanged +reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo) + ( return mempty + , do + gitindex <- inRepo currentIndexFile + indexcache <- fromRawFilePath <$> calcRepo' gitAnnexKeysDbIndexCache + withTSDelta (liftIO . genInodeCache gitindex) >>= \case + Just cur -> readindexcache indexcache >>= \case + Nothing -> go cur indexcache =<< getindextree + Just prev -> ifM (compareInodeCaches prev cur) + ( return mempty + , go cur indexcache =<< getindextree + ) + Nothing -> return mempty + ) where lastindexref = Ref "refs/annex/last-index" @@ -286,6 +299,7 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do -- against next time. inRepo $ update' lastindexref newtree fastDebug "Database.Keys" "reconcileStaged end" + return (DbTablesChanged True True) -- git write-tree will fail if the index is locked or when there is -- a merge conflict. To get up-to-date with the current index, -- diff --staged with the old index tree. The current index tree @@ -307,6 +321,7 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do void $ updatetodiff g Nothing "--staged" (procmergeconflictdiff mdfeeder) fastDebug "Database.Keys" "reconcileStaged end" + return (DbTablesChanged True True) updatetodiff g old new processor = do (l, cleanup) <- pipeNullSplit' (diff old new) g @@ -482,3 +497,9 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do largediff :: Int largediff = 1000 +{- Normally the keys database is updated incrementally when opened, + - by reconcileStaged. Calling this explicitly allows running the + - update at an earlier point. + -} +updateDatabase :: Annex () +updateDatabase = runWriter ContentTable (const noop) diff --git a/Database/Keys/Handle.hs b/Database/Keys/Handle.hs index ed7cc6e6c8..1e4a85427b 100644 --- a/Database/Keys/Handle.hs +++ b/Database/Keys/Handle.hs @@ -15,6 +15,7 @@ module Database.Keys.Handle ( ) where import qualified Database.Queue as H +import Database.Keys.Tables import Utility.Exception import Utility.DebugLocks @@ -29,7 +30,7 @@ newtype DbHandle = DbHandle (MVar DbState) -- The database can be closed or open, but it also may have been -- tried to open (for read) and didn't exist yet or is not readable. -data DbState = DbClosed | DbOpen H.DbQueue | DbUnavailable +data DbState = DbClosed | DbOpen (H.DbQueue, DbTablesChanged) | DbUnavailable newDbHandle :: IO DbHandle newDbHandle = DbHandle <$> newMVar DbClosed @@ -52,15 +53,17 @@ withDbState (DbHandle mvar) a = do return v flushDbQueue :: DbHandle -> IO () -flushDbQueue (DbHandle mvar) = go =<< debugLocks (readMVar mvar) +flushDbQueue h = withDbState h go where - go (DbOpen qh) = H.flushDbQueue qh - go _ = return () + go (DbOpen (qh, _)) = do + H.flushDbQueue qh + return ((), DbOpen (qh, mempty)) + go st = return ((), st) closeDbHandle :: DbHandle -> IO () closeDbHandle h = withDbState h go where - go (DbOpen qh) = do + go (DbOpen (qh, _)) = do H.closeDbQueue qh return ((), DbClosed) go st = return ((), st) diff --git a/Database/Keys/Tables.hs b/Database/Keys/Tables.hs new file mode 100644 index 0000000000..ab6a4fb757 --- /dev/null +++ b/Database/Keys/Tables.hs @@ -0,0 +1,38 @@ +{- Keeping track of which tables in the keys database have changed + - + - Copyright 2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Database.Keys.Tables where + +import Data.Monoid +import qualified Data.Semigroup as Sem +import Prelude + +data DbTable = AssociatedTable | ContentTable + deriving (Eq, Show) + +data DbTablesChanged = DbTablesChanged + { associatedTable :: Bool + , contentTable :: Bool + } + deriving (Show) + +instance Sem.Semigroup DbTablesChanged where + a <> b = DbTablesChanged + { associatedTable = associatedTable a || associatedTable b + , contentTable = contentTable a || contentTable b + } + +instance Monoid DbTablesChanged where + mempty = DbTablesChanged False False + +addDbTable :: DbTablesChanged -> DbTable -> DbTablesChanged +addDbTable ts AssociatedTable = ts { associatedTable = True } +addDbTable ts ContentTable = ts { contentTable = True } + +isDbTableChanged :: DbTablesChanged -> DbTable -> Bool +isDbTableChanged ts AssociatedTable = associatedTable ts +isDbTableChanged ts ContentTable = contentTable ts diff --git a/Upgrade/V7.hs b/Upgrade/V7.hs index 28e808b309..219c11ed14 100644 --- a/Upgrade/V7.hs +++ b/Upgrade/V7.hs @@ -16,6 +16,7 @@ import Types.Upgrade import Annex.CatFile import qualified Database.Keys import qualified Database.Keys.SQL +import Database.Keys.Tables import qualified Git.LsFiles as LsFiles import qualified Git import Git.FilePath @@ -114,8 +115,9 @@ populateKeysDb = unlessM isBareRepo $ do Nothing -> noop Just k -> do topf <- inRepo $ toTopFilePath $ toRawFilePath f - Database.Keys.runWriter $ \h -> liftIO $ do + Database.Keys.runWriter AssociatedTable $ \h -> liftIO $ Database.Keys.SQL.addAssociatedFile k topf h + Database.Keys.runWriter ContentTable $ \h -> liftIO $ Database.Keys.SQL.addInodeCaches k [ic] h liftIO $ void cleanup Database.Keys.closeDb diff --git a/git-annex.cabal b/git-annex.cabal index ce0cd60ea7..c60e627682 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -830,6 +830,7 @@ Executable git-annex Database.Init Database.Keys Database.Keys.Handle + Database.Keys.Tables Database.Keys.SQL Database.Queue Database.Types