Sped up the initial scanning for annexed files by 15%
Avoids database querying overhead when the database is newly created. In the large repository where git-annex init took 24 seconds, this sped it up to 20.47 seconds, a speedup of around 15%. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
a3e9a0ae27
commit
8fcee4ac9d
4 changed files with 32 additions and 8 deletions
|
@ -1,6 +1,7 @@
|
|||
git-annex (10.20221105) UNRELEASED; urgency=medium
|
||||
|
||||
* Support quettabyte and yottabyte.
|
||||
* Sped up the initial scanning for annexed files by 15%.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Fri, 18 Nov 2022 12:58:06 -0400
|
||||
|
||||
|
|
|
@ -132,10 +132,10 @@ openDb forwrite _ = do
|
|||
let db = dbdir P.</> "db"
|
||||
dbexists <- liftIO $ R.doesPathExist db
|
||||
case dbexists of
|
||||
True -> open db
|
||||
True -> open db False
|
||||
False -> do
|
||||
initDb db SQL.createTables
|
||||
open db
|
||||
open db True
|
||||
where
|
||||
-- If permissions don't allow opening the database, and it's being
|
||||
-- opened for read, treat it as if it does not exist.
|
||||
|
@ -143,9 +143,9 @@ openDb forwrite _ = do
|
|||
| forwrite = throwM e
|
||||
| otherwise = return DbUnavailable
|
||||
|
||||
open db = do
|
||||
open db dbisnew = do
|
||||
qh <- liftIO $ H.openDbQueue db SQL.containedTable
|
||||
tc <- reconcileStaged qh
|
||||
tc <- reconcileStaged dbisnew qh
|
||||
return $ DbOpen (qh, tc)
|
||||
|
||||
{- Closes the database if it was open. Any writes will be flushed to it.
|
||||
|
@ -260,8 +260,8 @@ isInodeKnown i s = or <$> runReaderIO ContentTable
|
|||
- So when using getAssociatedFiles, have to make sure the file still
|
||||
- is an associated file.
|
||||
-}
|
||||
reconcileStaged :: H.DbQueue -> Annex DbTablesChanged
|
||||
reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo)
|
||||
reconcileStaged :: Bool -> H.DbQueue -> Annex DbTablesChanged
|
||||
reconcileStaged dbisnew qh = ifM (Git.Config.isBare <$> gitRepo)
|
||||
( return mempty
|
||||
, do
|
||||
gitindex <- inRepo currentIndexFile
|
||||
|
@ -384,7 +384,7 @@ reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo)
|
|||
Nothing -> return False
|
||||
send mdfeeder (Ref dstsha) $ \case
|
||||
Just key -> do
|
||||
liftIO $ SQL.addAssociatedFile key
|
||||
liftIO $ addassociatedfile key
|
||||
(asTopFilePath file)
|
||||
(SQL.WriteHandle qh)
|
||||
when (dstmode /= fmtTreeItemType TreeSymlink) $
|
||||
|
@ -497,6 +497,18 @@ reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo)
|
|||
largediff :: Int
|
||||
largediff = 1000
|
||||
|
||||
-- When the database is known to have been newly created and empty
|
||||
-- before reconcileStaged started, it is more efficient to use
|
||||
-- newAssociatedFile. It's safe to use it here because this is run
|
||||
-- with a lock held that blocks any other process that opens the
|
||||
-- database, and when the database is newly created, there is no
|
||||
-- existing process that has it open already. And it's not possible
|
||||
-- for reconcileStaged to call this twice on the same filename with
|
||||
-- two different keys.
|
||||
addassociatedfile
|
||||
| dbisnew = SQL.newAssociatedFile
|
||||
| otherwise = SQL.addAssociatedFile
|
||||
|
||||
{- Normally the keys database is updated incrementally when opened,
|
||||
- by reconcileStaged. Calling this explicitly allows running the
|
||||
- update at an earlier point.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- Sqlite database of information about Keys
|
||||
-
|
||||
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2015-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -88,6 +88,15 @@ addAssociatedFile k f = queueDb $
|
|||
where
|
||||
af = SFilePath (getTopFilePath f)
|
||||
|
||||
-- Faster than addAssociatedFile, but only safe to use when the file
|
||||
-- was not associated with a different key before, as it does not delete
|
||||
-- any old key.
|
||||
newAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO ()
|
||||
newAssociatedFile k f = queueDb $
|
||||
void $ insert $ Associated k af
|
||||
where
|
||||
af = SFilePath (getTopFilePath f)
|
||||
|
||||
{- Note that the files returned were once associated with the key, but
|
||||
- some of them may not be any longer. -}
|
||||
getAssociatedFiles :: Key -> ReadHandle -> IO [TopFilePath]
|
||||
|
|
|
@ -12,4 +12,6 @@ This will need some care to be implemented safely...
|
|||
|
||||
I benchmarked it, and using insertUnique is no faster, but using insert is.
|
||||
This would be a 15% speed up.
|
||||
|
||||
Update: Implemented this optimisation.
|
||||
"""]]
|
||||
|
|
Loading…
Reference in a new issue