From 7f742589f95b8f5ae3e71ad3261c8cd97e29fa41 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 8 Jun 2021 11:09:15 -0400 Subject: [PATCH] claw back annexed file scan speedup Following commit c941ab6f5b98becfa2880cb69cbc2c4a39057cd9, this avoids the second, redundant scan when annex.thin is not set. The benchmark now runs in 35.5 seconds, down from 40 seconds. Note that the inode cache of the annex object has to be passed to addInodeCaches now, because it might not already be in the inode caches, unlike previously. Sponsored-by: Dartmouth College's Datalad project --- Annex/WorkTree.hs | 19 ++++++++++--------- Database/Keys.hs | 20 ++++++++++++++------ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index 30e7366396..ac9c49b27d 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -86,15 +86,16 @@ scanAnnexedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ d -- The above tries to populate pointer files, but one thing it -- is not able to handle is populating a pointer file when the -- annex object file already exists, but its inode is not yet - -- cached. So, the rest of this makes another pass over the - -- tree to do that. - g <- Annex.gitRepo - (l, cleanup) <- inRepo $ Git.LsTree.lsTree - Git.LsTree.LsTreeRecursive - (Git.LsTree.LsTreeLong True) - Git.Ref.headRef - catObjectStreamLsTree l want g go - liftIO $ void cleanup + -- cached and annex.thin is set. So, the rest of this makes + -- another pass over the tree to do that. + whenM (annexThin <$> Annex.getGitConfig) $ do + g <- Annex.gitRepo + (l, cleanup) <- inRepo $ Git.LsTree.lsTree + Git.LsTree.LsTreeRecursive + (Git.LsTree.LsTreeLong True) + Git.Ref.headRef + catObjectStreamLsTree l want g go + liftIO $ void cleanup where -- Want to process symlinks, and regular files. want i = case Git.Types.toTreeItemType (Git.LsTree.mode i) of diff --git a/Database/Keys.hs b/Database/Keys.hs index ac8aa7f5bd..aca13f94e2 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -362,17 +362,25 @@ reconcileStaged qh = do procmergeconflictdiff _ _ conflicted = return conflicted reconcilepointerfile file key = do - caches <- liftIO $ SQL.getInodeCaches key (SQL.ReadHandle qh) - keyloc <- calcRepo (gitAnnexLocation key) - keypopulated <- sameInodeCache keyloc caches + ics <- liftIO $ SQL.getInodeCaches key (SQL.ReadHandle qh) + obj <- calcRepo (gitAnnexLocation key) + objic <- withTSDelta (liftIO . genInodeCache obj) + -- Like inAnnex, check the annex object's inode cache + -- when annex.thin is set. + keypopulated <- ifM (annexThin <$> Annex.getGitConfig) + ( maybe (pure False) (`elemInodeCaches` ics) objic + , pure (isJust objic) + ) p <- fromRepo $ fromTopFilePath file - filepopulated <- sameInodeCache p caches + filepopulated <- sameInodeCache p ics case (keypopulated, filepopulated) of (True, False) -> - populatePointerFile (Restage True) key keyloc p >>= \case + populatePointerFile (Restage True) key obj p >>= \case Nothing -> return () Just ic -> liftIO $ - SQL.addInodeCaches key [ic] (SQL.WriteHandle qh) + SQL.addInodeCaches key + (catMaybes [Just ic, objic]) + (SQL.WriteHandle qh) (False, True) -> depopulatePointerFile key p _ -> return ()