diff --git a/CHANGELOG b/CHANGELOG index c4f81d8a80..eed419b50f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ git-annex (10.20221105) UNRELEASED; urgency=medium * Support quettabyte and yottabyte. + * Sped up the initial scanning for annexed files by 15%. -- Joey Hess Fri, 18 Nov 2022 12:58:06 -0400 diff --git a/Database/Keys.hs b/Database/Keys.hs index 45f8d2f851..9e1043edae 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -132,10 +132,10 @@ openDb forwrite _ = do let db = dbdir P. "db" dbexists <- liftIO $ R.doesPathExist db case dbexists of - True -> open db + True -> open db False False -> do initDb db SQL.createTables - open db + open db True where -- If permissions don't allow opening the database, and it's being -- opened for read, treat it as if it does not exist. @@ -143,9 +143,9 @@ openDb forwrite _ = do | forwrite = throwM e | otherwise = return DbUnavailable - open db = do + open db dbisnew = do qh <- liftIO $ H.openDbQueue db SQL.containedTable - tc <- reconcileStaged qh + tc <- reconcileStaged dbisnew qh return $ DbOpen (qh, tc) {- Closes the database if it was open. Any writes will be flushed to it. @@ -260,8 +260,8 @@ isInodeKnown i s = or <$> runReaderIO ContentTable - So when using getAssociatedFiles, have to make sure the file still - is an associated file. -} -reconcileStaged :: H.DbQueue -> Annex DbTablesChanged -reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo) +reconcileStaged :: Bool -> H.DbQueue -> Annex DbTablesChanged +reconcileStaged dbisnew qh = ifM (Git.Config.isBare <$> gitRepo) ( return mempty , do gitindex <- inRepo currentIndexFile @@ -384,7 +384,7 @@ reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo) Nothing -> return False send mdfeeder (Ref dstsha) $ \case Just key -> do - liftIO $ SQL.addAssociatedFile key + liftIO $ addassociatedfile key (asTopFilePath file) (SQL.WriteHandle qh) when (dstmode /= fmtTreeItemType TreeSymlink) $ @@ -497,6 +497,18 @@ reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo) largediff :: Int largediff = 1000 + -- When the database is known to have been newly created and empty + -- before reconcileStaged started, it is more efficient to use + -- newAssociatedFile. It's safe to use it here because this is run + -- with a lock held that blocks any other process that opens the + -- database, and when the database is newly created, there is no + -- existing process that has it open already. And it's not possible + -- for reconcileStaged to call this twice on the same filename with + -- two different keys. + addassociatedfile + | dbisnew = SQL.newAssociatedFile + | otherwise = SQL.addAssociatedFile + {- Normally the keys database is updated incrementally when opened, - by reconcileStaged. Calling this explicitly allows running the - update at an earlier point. diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index cab4c58759..54e1a59bf7 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -1,6 +1,6 @@ {- Sqlite database of information about Keys - - - Copyright 2015-2021 Joey Hess + - Copyright 2015-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -88,6 +88,15 @@ addAssociatedFile k f = queueDb $ where af = SFilePath (getTopFilePath f) +-- Faster than addAssociatedFile, but only safe to use when the file +-- was not associated with a different key before, as it does not delete +-- any old key. +newAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO () +newAssociatedFile k f = queueDb $ + void $ insert $ Associated k af + where + af = SFilePath (getTopFilePath f) + {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} getAssociatedFiles :: Key -> ReadHandle -> IO [TopFilePath] diff --git a/doc/bugs/performance_regression__63___init_takes_times_more/comment_13_a79fcbe80060d11582989a9fc31d4a92._comment b/doc/bugs/performance_regression__63___init_takes_times_more/comment_13_a79fcbe80060d11582989a9fc31d4a92._comment index 36474beabe..02dac893e3 100644 --- a/doc/bugs/performance_regression__63___init_takes_times_more/comment_13_a79fcbe80060d11582989a9fc31d4a92._comment +++ b/doc/bugs/performance_regression__63___init_takes_times_more/comment_13_a79fcbe80060d11582989a9fc31d4a92._comment @@ -12,4 +12,6 @@ This will need some care to be implemented safely... I benchmarked it, and using insertUnique is no faster, but using insert is. This would be a 15% speed up. + +Update: Implemented this optimisation. """]]