From e147ae07f4841180d420f20ce453a6cb85d8c4e2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Jun 2021 09:24:59 -0400 Subject: [PATCH] remove supportUnlocked check that is not worth its overhead moveAnnex only gets to that check if the object file was not present before. So in the case where dup files are being added repeatedly, it will only run the first time, and so there's no significant speedup from doing it; all it avoids is a single sqlite lookup. Since MVar accesses do have overhead, it's better to optimise for the common case, where unlocked files are supported. removeAnnex is less clear cut, but I think mostly is skipped running on keys when the object has already been dropped, so similar reasoning applies. --- Annex/Content.hs | 22 +++++++++---------- ..._6e5121e066998a303cf68ebc53e9fc15._comment | 10 +++++++++ 2 files changed, 20 insertions(+), 12 deletions(-) create mode 100644 doc/bugs/significant_performance_regression_impacting_datal/comment_33_6e5121e066998a303cf68ebc53e9fc15._comment diff --git a/Annex/Content.hs b/Annex/Content.hs index 07daa14cce..12af39618c 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -340,13 +340,12 @@ moveAnnex key af src = ifM (checkSecureHashes' key) liftIO $ moveFile (fromRawFilePath src) (fromRawFilePath dest) - whenM (annexSupportUnlocked <$> Annex.getGitConfig) $ do - g <- Annex.gitRepo - fs <- map (`fromTopFilePath` g) - <$> Database.Keys.getAssociatedFiles key - unless (null fs) $ do - ics <- mapM (populatePointerFile (Restage True) key dest) fs - Database.Keys.storeInodeCaches' key [dest] (catMaybes ics) + g <- Annex.gitRepo + fs <- map (`fromTopFilePath` g) + <$> Database.Keys.getAssociatedFiles key + unless (null fs) $ do + ics <- mapM (populatePointerFile (Restage True) key dest) fs + Database.Keys.storeInodeCaches' key [dest] (catMaybes ics) ) alreadyhave = liftIO $ R.removeLink src @@ -503,11 +502,10 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file -> cleanObjectLoc key $ do secureErase file liftIO $ removeWhenExistsWith R.removeLink file - whenM (annexSupportUnlocked <$> Annex.getGitConfig) $ do - g <- Annex.gitRepo - mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g) - =<< Database.Keys.getAssociatedFiles key - Database.Keys.removeInodeCaches key + g <- Annex.gitRepo + mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g) + =<< Database.Keys.getAssociatedFiles key + Database.Keys.removeInodeCaches key where -- Check associated pointer file for modifications, and reset if -- it's unmodified. diff --git a/doc/bugs/significant_performance_regression_impacting_datal/comment_33_6e5121e066998a303cf68ebc53e9fc15._comment b/doc/bugs/significant_performance_regression_impacting_datal/comment_33_6e5121e066998a303cf68ebc53e9fc15._comment new file mode 100644 index 0000000000..deeeec00b8 --- /dev/null +++ b/doc/bugs/significant_performance_regression_impacting_datal/comment_33_6e5121e066998a303cf68ebc53e9fc15._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 33""" + date="2021-06-15T13:01:04Z" + content=""" +Oh, there's a much better solution: If the annex object file already exists +when ingesting a new file, skip populating other associated files. They +will have already been populated. moveAnnex has to check if the annex object +file already exists anyway, so this will have zero overhead. +"""]]