Sped up the initial scanning for annexed files by 15%
Avoids database querying overhead when the database is newly created. In the large repository where git-annex init took 24 seconds, this sped it up to 20.47 seconds, a speedup of around 15%. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
a3e9a0ae27
commit
8fcee4ac9d
4 changed files with 32 additions and 8 deletions
|
@ -1,6 +1,7 @@
|
||||||
git-annex (10.20221105) UNRELEASED; urgency=medium
|
git-annex (10.20221105) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
* Support quettabyte and yottabyte.
|
* Support quettabyte and yottabyte.
|
||||||
|
* Sped up the initial scanning for annexed files by 15%.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Fri, 18 Nov 2022 12:58:06 -0400
|
-- Joey Hess <id@joeyh.name> Fri, 18 Nov 2022 12:58:06 -0400
|
||||||
|
|
||||||
|
|
|
@ -132,10 +132,10 @@ openDb forwrite _ = do
|
||||||
let db = dbdir P.</> "db"
|
let db = dbdir P.</> "db"
|
||||||
dbexists <- liftIO $ R.doesPathExist db
|
dbexists <- liftIO $ R.doesPathExist db
|
||||||
case dbexists of
|
case dbexists of
|
||||||
True -> open db
|
True -> open db False
|
||||||
False -> do
|
False -> do
|
||||||
initDb db SQL.createTables
|
initDb db SQL.createTables
|
||||||
open db
|
open db True
|
||||||
where
|
where
|
||||||
-- If permissions don't allow opening the database, and it's being
|
-- If permissions don't allow opening the database, and it's being
|
||||||
-- opened for read, treat it as if it does not exist.
|
-- opened for read, treat it as if it does not exist.
|
||||||
|
@ -143,9 +143,9 @@ openDb forwrite _ = do
|
||||||
| forwrite = throwM e
|
| forwrite = throwM e
|
||||||
| otherwise = return DbUnavailable
|
| otherwise = return DbUnavailable
|
||||||
|
|
||||||
open db = do
|
open db dbisnew = do
|
||||||
qh <- liftIO $ H.openDbQueue db SQL.containedTable
|
qh <- liftIO $ H.openDbQueue db SQL.containedTable
|
||||||
tc <- reconcileStaged qh
|
tc <- reconcileStaged dbisnew qh
|
||||||
return $ DbOpen (qh, tc)
|
return $ DbOpen (qh, tc)
|
||||||
|
|
||||||
{- Closes the database if it was open. Any writes will be flushed to it.
|
{- Closes the database if it was open. Any writes will be flushed to it.
|
||||||
|
@ -260,8 +260,8 @@ isInodeKnown i s = or <$> runReaderIO ContentTable
|
||||||
- So when using getAssociatedFiles, have to make sure the file still
|
- So when using getAssociatedFiles, have to make sure the file still
|
||||||
- is an associated file.
|
- is an associated file.
|
||||||
-}
|
-}
|
||||||
reconcileStaged :: H.DbQueue -> Annex DbTablesChanged
|
reconcileStaged :: Bool -> H.DbQueue -> Annex DbTablesChanged
|
||||||
reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo)
|
reconcileStaged dbisnew qh = ifM (Git.Config.isBare <$> gitRepo)
|
||||||
( return mempty
|
( return mempty
|
||||||
, do
|
, do
|
||||||
gitindex <- inRepo currentIndexFile
|
gitindex <- inRepo currentIndexFile
|
||||||
|
@ -384,7 +384,7 @@ reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo)
|
||||||
Nothing -> return False
|
Nothing -> return False
|
||||||
send mdfeeder (Ref dstsha) $ \case
|
send mdfeeder (Ref dstsha) $ \case
|
||||||
Just key -> do
|
Just key -> do
|
||||||
liftIO $ SQL.addAssociatedFile key
|
liftIO $ addassociatedfile key
|
||||||
(asTopFilePath file)
|
(asTopFilePath file)
|
||||||
(SQL.WriteHandle qh)
|
(SQL.WriteHandle qh)
|
||||||
when (dstmode /= fmtTreeItemType TreeSymlink) $
|
when (dstmode /= fmtTreeItemType TreeSymlink) $
|
||||||
|
@ -497,6 +497,18 @@ reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo)
|
||||||
largediff :: Int
|
largediff :: Int
|
||||||
largediff = 1000
|
largediff = 1000
|
||||||
|
|
||||||
|
-- When the database is known to have been newly created and empty
|
||||||
|
-- before reconcileStaged started, it is more efficient to use
|
||||||
|
-- newAssociatedFile. It's safe to use it here because this is run
|
||||||
|
-- with a lock held that blocks any other process that opens the
|
||||||
|
-- database, and when the database is newly created, there is no
|
||||||
|
-- existing process that has it open already. And it's not possible
|
||||||
|
-- for reconcileStaged to call this twice on the same filename with
|
||||||
|
-- two different keys.
|
||||||
|
addassociatedfile
|
||||||
|
| dbisnew = SQL.newAssociatedFile
|
||||||
|
| otherwise = SQL.addAssociatedFile
|
||||||
|
|
||||||
{- Normally the keys database is updated incrementally when opened,
|
{- Normally the keys database is updated incrementally when opened,
|
||||||
- by reconcileStaged. Calling this explicitly allows running the
|
- by reconcileStaged. Calling this explicitly allows running the
|
||||||
- update at an earlier point.
|
- update at an earlier point.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- Sqlite database of information about Keys
|
{- Sqlite database of information about Keys
|
||||||
-
|
-
|
||||||
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
|
- Copyright 2015-2022 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -88,6 +88,15 @@ addAssociatedFile k f = queueDb $
|
||||||
where
|
where
|
||||||
af = SFilePath (getTopFilePath f)
|
af = SFilePath (getTopFilePath f)
|
||||||
|
|
||||||
|
-- Faster than addAssociatedFile, but only safe to use when the file
|
||||||
|
-- was not associated with a different key before, as it does not delete
|
||||||
|
-- any old key.
|
||||||
|
newAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO ()
|
||||||
|
newAssociatedFile k f = queueDb $
|
||||||
|
void $ insert $ Associated k af
|
||||||
|
where
|
||||||
|
af = SFilePath (getTopFilePath f)
|
||||||
|
|
||||||
{- Note that the files returned were once associated with the key, but
|
{- Note that the files returned were once associated with the key, but
|
||||||
- some of them may not be any longer. -}
|
- some of them may not be any longer. -}
|
||||||
getAssociatedFiles :: Key -> ReadHandle -> IO [TopFilePath]
|
getAssociatedFiles :: Key -> ReadHandle -> IO [TopFilePath]
|
||||||
|
|
|
@ -12,4 +12,6 @@ This will need some care to be implemented safely...
|
||||||
|
|
||||||
I benchmarked it, and using insertUnique is no faster, but using insert is.
|
I benchmarked it, and using insertUnique is no faster, but using insert is.
|
||||||
This would be a 15% speed up.
|
This would be a 15% speed up.
|
||||||
|
|
||||||
|
Update: Implemented this optimisation.
|
||||||
"""]]
|
"""]]
|
||||||
|
|
Loading…
Reference in a new issue