From 2b014f1a8b6ca5501c7e9ea957ffc7bda1fc2e33 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 18 Nov 2022 13:58:35 -0400 Subject: [PATCH] don't frontload reconcileStaged in git-annex init init: Avoid scanning for annexed files, which can be lengthy in a large repository. Instead that scan is done on demand. This lets git-annex init be run and some query commands be used in a repository without waiting. Note that autoinit already behaved this way, so while this will mean some commands like git-annex get/unlock/add will do the scan the first time run, that is not really a significant behavior change. And, it's really better to have a consistent behavior. The reason for the inconsistency was a strange bug discussed in b3c4579c7907147a496bdf2c73b42238d8b239d6. Avoiding reconcileStaged in init will keep avoiding whatever that was. Sponsored-by: Dartmouth College's DANDI project --- Annex/Init.hs | 17 +++++++---------- Assistant/MakeRepo.hs | 2 +- CHANGELOG | 4 +++- Command/ConfigList.hs | 2 +- Command/Init.hs | 2 +- Command/Reinit.hs | 2 +- Command/Upgrade.hs | 2 +- ...14_8c3b13806adb731435b346a64990527b._comment | 11 +++++++++++ 8 files changed, 26 insertions(+), 16 deletions(-) diff --git a/Annex/Init.hs b/Annex/Init.hs index 94f3ed5079..8b86572bec 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -37,7 +37,6 @@ import Types.RepoVersion import Annex.Version import Annex.Difference import Annex.UUID -import Annex.WorkTree import Annex.Fixup import Annex.Path import Config @@ -102,8 +101,8 @@ genDescription Nothing = do Right username -> [username, at, hostname, ":", reldir] Left _ -> [hostname, ":", reldir] -initialize :: Bool -> Maybe String -> Maybe RepoVersion -> Annex () -initialize autoinit mdescription mversion = checkInitializeAllowed $ \initallowed -> do +initialize :: Maybe String -> Maybe RepoVersion -> Annex () +initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do {- Has to come before any commits are made as the shared - clone heuristic expects no local objects. -} sharedclone <- checkSharedClone @@ -113,7 +112,7 @@ initialize autoinit mdescription mversion = checkInitializeAllowed $ \initallowe ensureCommit $ Annex.Branch.create prepUUID - initialize' autoinit mversion initallowed + initialize' mversion initallowed initSharedClone sharedclone @@ -125,8 +124,8 @@ initialize autoinit mdescription mversion = checkInitializeAllowed $ \initallowe -- Everything except for uuid setup, shared clone setup, and initial -- description. -initialize' :: Bool -> Maybe RepoVersion -> InitializeAllowed -> Annex () -initialize' autoinit mversion _initallowed = do +initialize' :: Maybe RepoVersion -> InitializeAllowed -> Annex () +initialize' mversion _initallowed = do checkLockSupport checkFifoSupport checkCrippledFileSystem @@ -143,8 +142,6 @@ initialize' autoinit mversion _initallowed = do unlessM isBareRepo $ do hookWrite postCheckoutHook hookWrite postMergeHook - unless autoinit $ - scanAnnexedFiles AdjustedBranch.checkAdjustedClone >>= \case AdjustedBranch.InAdjustedClone -> return () @@ -206,7 +203,7 @@ ensureInitialized remotelist = getInitializedVersion >>= maybe needsinit checkUp where needsinit = ifM autoInitializeAllowed ( do - tryNonAsync (initialize True Nothing Nothing) >>= \case + tryNonAsync (initialize Nothing Nothing) >>= \case Right () -> noop Left e -> giveup $ show e ++ "\n" ++ "git-annex: automatic initialization failed due to above problems" @@ -259,7 +256,7 @@ autoInitialize remotelist = getInitializedVersion >>= maybe needsinit checkUpgra where needsinit = whenM (initializeAllowed <&&> autoInitializeAllowed) $ do - initialize True Nothing Nothing + initialize Nothing Nothing autoEnableSpecialRemotes remotelist {- Checks if a repository is initialized. Does not check version for ugrade. -} diff --git a/Assistant/MakeRepo.hs b/Assistant/MakeRepo.hs index 632c4abda5..bad4951b1d 100644 --- a/Assistant/MakeRepo.hs +++ b/Assistant/MakeRepo.hs @@ -85,7 +85,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do initRepo' :: Maybe String -> Maybe StandardGroup -> Annex () initRepo' desc mgroup = unlessM isInitialized $ do - initialize False desc Nothing + initialize desc Nothing u <- getUUID maybe noop (defaultStandardGroup u) mgroup {- Ensure branch gets committed right away so it is diff --git a/CHANGELOG b/CHANGELOG index a8b06f1412..d193ceca85 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,9 @@ git-annex (10.20221105) UNRELEASED; urgency=medium * Support quettabyte and yottabyte. - * Sped up the initial scanning for annexed files by 21%. + * Sped up the initial scan for annexed files by 21%. + * init: Avoid scanning for annexed files, which can be lengthy in a + large repository. Instead that scan is done on demand. -- Joey Hess Fri, 18 Nov 2022 12:58:06 -0400 diff --git a/Command/ConfigList.hs b/Command/ConfigList.hs index d259b87409..bb33f7102b 100644 --- a/Command/ConfigList.hs +++ b/Command/ConfigList.hs @@ -47,7 +47,7 @@ findOrGenUUID = do else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit)) ( do liftIO checkNotReadOnly - initialize True Nothing Nothing + initialize Nothing Nothing getUUID , return NoUUID ) diff --git a/Command/Init.hs b/Command/Init.hs index 8670ea221e..69e7516472 100644 --- a/Command/Init.hs +++ b/Command/Init.hs @@ -75,7 +75,7 @@ perform os = do Just v | v /= wantversion -> giveup $ "This repository is already a initialized with version " ++ show (fromRepoVersion v) ++ ", not changing to requested version." _ -> noop - initialize False + initialize (if null (initDesc os) then Nothing else Just (initDesc os)) (initVersion os) unless (noAutoEnable os) diff --git a/Command/Reinit.hs b/Command/Reinit.hs index 42d47a0141..d11d807ab4 100644 --- a/Command/Reinit.hs +++ b/Command/Reinit.hs @@ -35,6 +35,6 @@ perform s = do then return $ toUUID s else Remote.nameToUUID s storeUUID u - checkInitializeAllowed $ initialize' False Nothing + checkInitializeAllowed $ initialize' Nothing Annex.SpecialRemote.autoEnable next $ return True diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs index f2f05122fc..77f569a15b 100644 --- a/Command/Upgrade.hs +++ b/Command/Upgrade.hs @@ -45,6 +45,6 @@ start (UpgradeOptions { autoOnly = True }) = start _ = starting "upgrade" (ActionItemOther Nothing) (SeekInput []) $ do whenM (isNothing <$> getVersion) $ do - initialize False Nothing Nothing + initialize Nothing Nothing r <- upgrade False latestVersion next $ return r diff --git a/doc/bugs/performance_regression__63___init_takes_times_more/comment_14_8c3b13806adb731435b346a64990527b._comment b/doc/bugs/performance_regression__63___init_takes_times_more/comment_14_8c3b13806adb731435b346a64990527b._comment index 8c6505d068..eedca77d66 100644 --- a/doc/bugs/performance_regression__63___init_takes_times_more/comment_14_8c3b13806adb731435b346a64990527b._comment +++ b/doc/bugs/performance_regression__63___init_takes_times_more/comment_14_8c3b13806adb731435b346a64990527b._comment @@ -5,4 +5,15 @@ content=""" Implemented the two optimisations discussed above, and init in that repository dropped from 24 seconds to 19 seconds, a 21% speedup. + +I think that's as fast as reconcileStaged is likely to get without +some deep optimisation of the persistent library. + +Then I realized that `git-annex init` does not really need to scan for +associated files. That can be done later, when running a command that needs +to access the keys database. Indeed, when git-annex is used in a clone of +an annexed repo without explicitly running `git-annex init`, that's what +it already did. I've implemented that, so now `git-annex init` takes 3 +seconds or so. The price will be paid later, the first time running a +`git-annex add` or `git-annex unlock` or `git-annex get`. """]]