claw back annexed file scan speedup
Following commit c941ab6f5b
, this avoids
the second, redundant scan when annex.thin is not set.
The benchmark now runs in 35.5 seconds, down from 40 seconds.
Note that the inode cache of the annex object has to be passed to
addInodeCaches now, because it might not already be in the inode caches,
unlike previously.
Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
ec1f2f246b
commit
7f742589f9
2 changed files with 24 additions and 15 deletions
|
@ -86,15 +86,16 @@ scanAnnexedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ d
|
||||||
-- The above tries to populate pointer files, but one thing it
|
-- The above tries to populate pointer files, but one thing it
|
||||||
-- is not able to handle is populating a pointer file when the
|
-- is not able to handle is populating a pointer file when the
|
||||||
-- annex object file already exists, but its inode is not yet
|
-- annex object file already exists, but its inode is not yet
|
||||||
-- cached. So, the rest of this makes another pass over the
|
-- cached and annex.thin is set. So, the rest of this makes
|
||||||
-- tree to do that.
|
-- another pass over the tree to do that.
|
||||||
g <- Annex.gitRepo
|
whenM (annexThin <$> Annex.getGitConfig) $ do
|
||||||
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
|
g <- Annex.gitRepo
|
||||||
Git.LsTree.LsTreeRecursive
|
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
|
||||||
(Git.LsTree.LsTreeLong True)
|
Git.LsTree.LsTreeRecursive
|
||||||
Git.Ref.headRef
|
(Git.LsTree.LsTreeLong True)
|
||||||
catObjectStreamLsTree l want g go
|
Git.Ref.headRef
|
||||||
liftIO $ void cleanup
|
catObjectStreamLsTree l want g go
|
||||||
|
liftIO $ void cleanup
|
||||||
where
|
where
|
||||||
-- Want to process symlinks, and regular files.
|
-- Want to process symlinks, and regular files.
|
||||||
want i = case Git.Types.toTreeItemType (Git.LsTree.mode i) of
|
want i = case Git.Types.toTreeItemType (Git.LsTree.mode i) of
|
||||||
|
|
|
@ -362,17 +362,25 @@ reconcileStaged qh = do
|
||||||
procmergeconflictdiff _ _ conflicted = return conflicted
|
procmergeconflictdiff _ _ conflicted = return conflicted
|
||||||
|
|
||||||
reconcilepointerfile file key = do
|
reconcilepointerfile file key = do
|
||||||
caches <- liftIO $ SQL.getInodeCaches key (SQL.ReadHandle qh)
|
ics <- liftIO $ SQL.getInodeCaches key (SQL.ReadHandle qh)
|
||||||
keyloc <- calcRepo (gitAnnexLocation key)
|
obj <- calcRepo (gitAnnexLocation key)
|
||||||
keypopulated <- sameInodeCache keyloc caches
|
objic <- withTSDelta (liftIO . genInodeCache obj)
|
||||||
|
-- Like inAnnex, check the annex object's inode cache
|
||||||
|
-- when annex.thin is set.
|
||||||
|
keypopulated <- ifM (annexThin <$> Annex.getGitConfig)
|
||||||
|
( maybe (pure False) (`elemInodeCaches` ics) objic
|
||||||
|
, pure (isJust objic)
|
||||||
|
)
|
||||||
p <- fromRepo $ fromTopFilePath file
|
p <- fromRepo $ fromTopFilePath file
|
||||||
filepopulated <- sameInodeCache p caches
|
filepopulated <- sameInodeCache p ics
|
||||||
case (keypopulated, filepopulated) of
|
case (keypopulated, filepopulated) of
|
||||||
(True, False) ->
|
(True, False) ->
|
||||||
populatePointerFile (Restage True) key keyloc p >>= \case
|
populatePointerFile (Restage True) key obj p >>= \case
|
||||||
Nothing -> return ()
|
Nothing -> return ()
|
||||||
Just ic -> liftIO $
|
Just ic -> liftIO $
|
||||||
SQL.addInodeCaches key [ic] (SQL.WriteHandle qh)
|
SQL.addInodeCaches key
|
||||||
|
(catMaybes [Just ic, objic])
|
||||||
|
(SQL.WriteHandle qh)
|
||||||
(False, True) -> depopulatePointerFile key p
|
(False, True) -> depopulatePointerFile key p
|
||||||
_ -> return ()
|
_ -> return ()
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue