don't frontload reconcileStaged in git-annex init
init: Avoid scanning for annexed files, which can be lengthy in a
large repository. Instead that scan is done on demand. This lets git-annex
init be run and some query commands be used in a repository without
waiting.
Note that autoinit already behaved this way, so while this will mean some
commands like git-annex get/unlock/add will do the scan the first time run,
that is not really a significant behavior change.
And, it's really better to have a consistent behavior. The reason for
the inconsistency was a strange bug discussed in
b3c4579c79. Avoiding reconcileStaged in
init will keep avoiding whatever that was.
Sponsored-by: Dartmouth College's DANDI project
	
	
This commit is contained in:
		
					parent
					
						
							
								c834d2025a
							
						
					
				
			
			
				commit
				
					
						2b014f1a8b
					
				
			
		
					 8 changed files with 26 additions and 16 deletions
				
			
		| 
						 | 
					@ -37,7 +37,6 @@ import Types.RepoVersion
 | 
				
			||||||
import Annex.Version
 | 
					import Annex.Version
 | 
				
			||||||
import Annex.Difference
 | 
					import Annex.Difference
 | 
				
			||||||
import Annex.UUID
 | 
					import Annex.UUID
 | 
				
			||||||
import Annex.WorkTree
 | 
					 | 
				
			||||||
import Annex.Fixup
 | 
					import Annex.Fixup
 | 
				
			||||||
import Annex.Path
 | 
					import Annex.Path
 | 
				
			||||||
import Config
 | 
					import Config
 | 
				
			||||||
| 
						 | 
					@ -102,8 +101,8 @@ genDescription Nothing = do
 | 
				
			||||||
		Right username -> [username, at, hostname, ":", reldir]
 | 
							Right username -> [username, at, hostname, ":", reldir]
 | 
				
			||||||
		Left _ -> [hostname, ":", reldir]
 | 
							Left _ -> [hostname, ":", reldir]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
initialize :: Bool -> Maybe String -> Maybe RepoVersion -> Annex ()
 | 
					initialize :: Maybe String -> Maybe RepoVersion -> Annex ()
 | 
				
			||||||
initialize autoinit mdescription mversion = checkInitializeAllowed $ \initallowed -> do
 | 
					initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
 | 
				
			||||||
	{- Has to come before any commits are made as the shared
 | 
						{- Has to come before any commits are made as the shared
 | 
				
			||||||
	 - clone heuristic expects no local objects. -}
 | 
						 - clone heuristic expects no local objects. -}
 | 
				
			||||||
	sharedclone <- checkSharedClone
 | 
						sharedclone <- checkSharedClone
 | 
				
			||||||
| 
						 | 
					@ -113,7 +112,7 @@ initialize autoinit mdescription mversion = checkInitializeAllowed $ \initallowe
 | 
				
			||||||
	ensureCommit $ Annex.Branch.create
 | 
						ensureCommit $ Annex.Branch.create
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	prepUUID
 | 
						prepUUID
 | 
				
			||||||
	initialize' autoinit mversion initallowed
 | 
						initialize' mversion initallowed
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	initSharedClone sharedclone
 | 
						initSharedClone sharedclone
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
| 
						 | 
					@ -125,8 +124,8 @@ initialize autoinit mdescription mversion = checkInitializeAllowed $ \initallowe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- Everything except for uuid setup, shared clone setup, and initial
 | 
					-- Everything except for uuid setup, shared clone setup, and initial
 | 
				
			||||||
-- description.
 | 
					-- description.
 | 
				
			||||||
initialize' :: Bool -> Maybe RepoVersion -> InitializeAllowed -> Annex ()
 | 
					initialize' :: Maybe RepoVersion -> InitializeAllowed -> Annex ()
 | 
				
			||||||
initialize' autoinit mversion _initallowed = do
 | 
					initialize' mversion _initallowed = do
 | 
				
			||||||
	checkLockSupport
 | 
						checkLockSupport
 | 
				
			||||||
	checkFifoSupport
 | 
						checkFifoSupport
 | 
				
			||||||
	checkCrippledFileSystem
 | 
						checkCrippledFileSystem
 | 
				
			||||||
| 
						 | 
					@ -143,8 +142,6 @@ initialize' autoinit mversion _initallowed = do
 | 
				
			||||||
	unlessM isBareRepo $ do
 | 
						unlessM isBareRepo $ do
 | 
				
			||||||
		hookWrite postCheckoutHook
 | 
							hookWrite postCheckoutHook
 | 
				
			||||||
		hookWrite postMergeHook
 | 
							hookWrite postMergeHook
 | 
				
			||||||
		unless autoinit $
 | 
					 | 
				
			||||||
			scanAnnexedFiles
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	AdjustedBranch.checkAdjustedClone >>= \case
 | 
						AdjustedBranch.checkAdjustedClone >>= \case
 | 
				
			||||||
		AdjustedBranch.InAdjustedClone -> return ()
 | 
							AdjustedBranch.InAdjustedClone -> return ()
 | 
				
			||||||
| 
						 | 
					@ -206,7 +203,7 @@ ensureInitialized remotelist = getInitializedVersion >>= maybe needsinit checkUp
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
	needsinit = ifM autoInitializeAllowed
 | 
						needsinit = ifM autoInitializeAllowed
 | 
				
			||||||
		( do
 | 
							( do
 | 
				
			||||||
			tryNonAsync (initialize True Nothing Nothing) >>= \case
 | 
								tryNonAsync (initialize Nothing Nothing) >>= \case
 | 
				
			||||||
				Right () -> noop
 | 
									Right () -> noop
 | 
				
			||||||
				Left e -> giveup $ show e ++ "\n" ++
 | 
									Left e -> giveup $ show e ++ "\n" ++
 | 
				
			||||||
					"git-annex: automatic initialization failed due to above problems"
 | 
										"git-annex: automatic initialization failed due to above problems"
 | 
				
			||||||
| 
						 | 
					@ -259,7 +256,7 @@ autoInitialize remotelist = getInitializedVersion >>= maybe needsinit checkUpgra
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
	needsinit =
 | 
						needsinit =
 | 
				
			||||||
		whenM (initializeAllowed <&&> autoInitializeAllowed) $ do
 | 
							whenM (initializeAllowed <&&> autoInitializeAllowed) $ do
 | 
				
			||||||
			initialize True Nothing Nothing
 | 
								initialize Nothing Nothing
 | 
				
			||||||
			autoEnableSpecialRemotes remotelist
 | 
								autoEnableSpecialRemotes remotelist
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- Checks if a repository is initialized. Does not check version for ugrade. -}
 | 
					{- Checks if a repository is initialized. Does not check version for ugrade. -}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -85,7 +85,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
initRepo' :: Maybe String -> Maybe StandardGroup -> Annex ()
 | 
					initRepo' :: Maybe String -> Maybe StandardGroup -> Annex ()
 | 
				
			||||||
initRepo' desc mgroup = unlessM isInitialized $ do
 | 
					initRepo' desc mgroup = unlessM isInitialized $ do
 | 
				
			||||||
	initialize False desc Nothing
 | 
						initialize desc Nothing
 | 
				
			||||||
	u <- getUUID
 | 
						u <- getUUID
 | 
				
			||||||
	maybe noop (defaultStandardGroup u) mgroup
 | 
						maybe noop (defaultStandardGroup u) mgroup
 | 
				
			||||||
	{- Ensure branch gets committed right away so it is
 | 
						{- Ensure branch gets committed right away so it is
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,7 +1,9 @@
 | 
				
			||||||
git-annex (10.20221105) UNRELEASED; urgency=medium
 | 
					git-annex (10.20221105) UNRELEASED; urgency=medium
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  * Support quettabyte and yottabyte.
 | 
					  * Support quettabyte and yottabyte.
 | 
				
			||||||
  * Sped up the initial scanning for annexed files by 21%.
 | 
					  * Sped up the initial scan for annexed files by 21%.
 | 
				
			||||||
 | 
					  * init: Avoid scanning for annexed files, which can be lengthy in a
 | 
				
			||||||
 | 
					    large repository. Instead that scan is done on demand.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 -- Joey Hess <id@joeyh.name>  Fri, 18 Nov 2022 12:58:06 -0400
 | 
					 -- Joey Hess <id@joeyh.name>  Fri, 18 Nov 2022 12:58:06 -0400
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -47,7 +47,7 @@ findOrGenUUID = do
 | 
				
			||||||
		else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit))
 | 
							else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit))
 | 
				
			||||||
			( do
 | 
								( do
 | 
				
			||||||
				liftIO checkNotReadOnly
 | 
									liftIO checkNotReadOnly
 | 
				
			||||||
				initialize True Nothing Nothing
 | 
									initialize Nothing Nothing
 | 
				
			||||||
				getUUID
 | 
									getUUID
 | 
				
			||||||
			, return NoUUID
 | 
								, return NoUUID
 | 
				
			||||||
			)
 | 
								)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -75,7 +75,7 @@ perform os = do
 | 
				
			||||||
			Just v | v /= wantversion ->
 | 
								Just v | v /= wantversion ->
 | 
				
			||||||
				giveup $ "This repository is already a initialized with version " ++ show (fromRepoVersion v) ++ ", not changing to requested version."
 | 
									giveup $ "This repository is already a initialized with version " ++ show (fromRepoVersion v) ++ ", not changing to requested version."
 | 
				
			||||||
			_ -> noop
 | 
								_ -> noop
 | 
				
			||||||
	initialize False
 | 
						initialize
 | 
				
			||||||
		(if null (initDesc os) then Nothing else Just (initDesc os))
 | 
							(if null (initDesc os) then Nothing else Just (initDesc os))
 | 
				
			||||||
		(initVersion os)
 | 
							(initVersion os)
 | 
				
			||||||
	unless (noAutoEnable os)
 | 
						unless (noAutoEnable os)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -35,6 +35,6 @@ perform s = do
 | 
				
			||||||
		then return $ toUUID s
 | 
							then return $ toUUID s
 | 
				
			||||||
		else Remote.nameToUUID s
 | 
							else Remote.nameToUUID s
 | 
				
			||||||
	storeUUID u
 | 
						storeUUID u
 | 
				
			||||||
	checkInitializeAllowed $ initialize' False Nothing
 | 
						checkInitializeAllowed $ initialize' Nothing
 | 
				
			||||||
	Annex.SpecialRemote.autoEnable
 | 
						Annex.SpecialRemote.autoEnable
 | 
				
			||||||
	next $ return True
 | 
						next $ return True
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -45,6 +45,6 @@ start (UpgradeOptions { autoOnly = True }) =
 | 
				
			||||||
start _ =
 | 
					start _ =
 | 
				
			||||||
	starting "upgrade" (ActionItemOther Nothing) (SeekInput []) $ do
 | 
						starting "upgrade" (ActionItemOther Nothing) (SeekInput []) $ do
 | 
				
			||||||
		whenM (isNothing <$> getVersion) $ do
 | 
							whenM (isNothing <$> getVersion) $ do
 | 
				
			||||||
			initialize False Nothing Nothing
 | 
								initialize Nothing Nothing
 | 
				
			||||||
		r <- upgrade False latestVersion
 | 
							r <- upgrade False latestVersion
 | 
				
			||||||
		next $ return r
 | 
							next $ return r
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,4 +5,15 @@
 | 
				
			||||||
 content="""
 | 
					 content="""
 | 
				
			||||||
Implemented the two optimisations discussed above, and init in that
 | 
					Implemented the two optimisations discussed above, and init in that
 | 
				
			||||||
repository dropped from 24 seconds to 19 seconds, a 21% speedup.
 | 
					repository dropped from 24 seconds to 19 seconds, a 21% speedup.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					I think that's as fast as reconcileStaged is likely to get without
 | 
				
			||||||
 | 
					some deep optimisation of the persistent library.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Then I realized that `git-annex init` does not really need to scan for
 | 
				
			||||||
 | 
					associated files. That can be done later, when running a command that needs
 | 
				
			||||||
 | 
					to access the keys database. Indeed, when git-annex is used in a clone of
 | 
				
			||||||
 | 
					an annexed repo without explicitly running `git-annex init`, that's what
 | 
				
			||||||
 | 
					it already did. I've implemented that, so now `git-annex init` takes 3
 | 
				
			||||||
 | 
					seconds or so. The price will be paid later, the first time running a
 | 
				
			||||||
 | 
					`git-annex add` or `git-annex unlock` or `git-annex get`.
 | 
				
			||||||
"""]]
 | 
					"""]]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue