diff --git a/Annex/Branch.hs b/Annex/Branch.hs index bcc9ae114d..717cbc0400 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -727,7 +727,8 @@ stageJournal :: JournalLocked -> Annex () -> Annex () stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do prepareModifyIndex jl g <- gitRepo - let dir = gitAnnexJournalDir g + st <- getState + let dir = gitAnnexJournalDir st g (jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir) withHashObjectHandle $ \h -> withJournalHandle gitAnnexJournalDir $ \jh -> diff --git a/Annex/Journal.hs b/Annex/Journal.hs index ea6327606d..54dd3317ef 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -7,7 +7,7 @@ - All files in the journal must be a series of lines separated by - newlines. - - - Copyright 2011-2022 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -23,6 +23,8 @@ import qualified Git import Annex.Perms import Annex.Tmp import Annex.LockFile +import Annex.BranchState +import Types.BranchState import Utility.Directory.Stream import qualified Utility.RawFilePath as R @@ -82,9 +84,10 @@ privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig -} setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex () setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do + st <- getState jd <- fromRepo =<< ifM (regardingPrivateUUID ru) - ( return gitAnnexPrivateJournalDir - , return gitAnnexJournalDir + ( return (gitAnnexPrivateJournalDir st) + , return (gitAnnexJournalDir st) ) -- journal file is written atomically let jfile = journalFile file @@ -106,9 +109,10 @@ newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath) - branch. -} checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile) checkCanAppendJournalFile _jl ru file = do + st <- getState jd <- fromRepo =<< ifM (regardingPrivateUUID ru) - ( return gitAnnexPrivateJournalDir - , return gitAnnexJournalDir + ( return (gitAnnexPrivateJournalDir st) + , return (gitAnnexJournalDir st) ) let jfile = jd P. journalFile file ifM (liftIO $ R.doesPathExist jfile) @@ -176,14 +180,12 @@ data GetPrivate = GetPrivate Bool -} getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent getJournalFileStale (GetPrivate getprivate) file = do - -- Optimisation to avoid a second MVar access. st <- Annex.getState id - let g = Annex.repo st liftIO $ if getprivate && privateUUIDsKnown' st then do - x <- getfrom (gitAnnexJournalDir g) - getfrom (gitAnnexPrivateJournalDir g) >>= \case + x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) + getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case Nothing -> return $ case x of Nothing -> NoJournalledContent Just b -> JournalledContent b @@ -193,7 +195,7 @@ getJournalFileStale (GetPrivate getprivate) file = do -- happens in a merge of two -- git-annex branches. Just x' -> x' <> y - else getfrom (gitAnnexJournalDir g) >>= return . \case + else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case Nothing -> NoJournalledContent Just b -> JournalledContent b where @@ -219,18 +221,20 @@ discardIncompleteAppend v {- List of existing journal files in a journal directory, but without locking, - may miss new ones just being added, or may have false positives if the - journal is staged as it is run. -} -getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath] +getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath] getJournalledFilesStale getjournaldir = do - g <- gitRepo - fs <- liftIO $ catchDefaultIO [] $ - getDirectoryContents $ fromRawFilePath (getjournaldir g) + st <- Annex.getState id + let d = getjournaldir (Annex.branchstate st) (Annex.repo st) + fs <- liftIO $ catchDefaultIO [] $ + getDirectoryContents (fromRawFilePath d) return $ filter (`notElem` [".", ".."]) $ map (fileJournal . toRawFilePath) fs {- Directory handle open on a journal directory. -} -withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a +withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a withJournalHandle getjournaldir a = do - d <- fromRepo getjournaldir + st <- Annex.getState id + let d = getjournaldir (Annex.branchstate st) (Annex.repo st) bracket (opendir d) (liftIO . closeDirectory) (liftIO . a) where -- avoid overhead of creating the journal directory when it already @@ -239,9 +243,10 @@ withJournalHandle getjournaldir a = do `catchIO` (const (createAnnexDirectory d >> opendir d)) {- Checks if there are changes in the journal. -} -journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool +journalDirty :: (BranchState -> Git.Repo -> RawFilePath) -> Annex Bool journalDirty getjournaldir = do - d <- fromRawFilePath <$> fromRepo getjournaldir + st <- getState + d <- fromRawFilePath <$> fromRepo (getjournaldir st) liftIO $ (not <$> isDirectoryEmpty d) `catchIO` (const $ doesDirectoryExist d) diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 9b465dce8d..ee5b6d690f 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -118,6 +118,7 @@ import Key import Types.UUID import Types.GitConfig import Types.Difference +import Types.BranchState import qualified Git import qualified Git.Types as Git import Git.FilePath @@ -528,15 +529,19 @@ gitAnnexTransferDir r = {- .git/annex/journal/ is used to journal changes made to the git-annex - branch -} -gitAnnexJournalDir :: Git.Repo -> RawFilePath -gitAnnexJournalDir r = - P.addTrailingPathSeparator $ gitAnnexDir r P. "journal" +gitAnnexJournalDir :: BranchState -> Git.Repo -> RawFilePath +gitAnnexJournalDir st r = P.addTrailingPathSeparator $ + case alternateJournal st of + Nothing -> gitAnnexDir r P. "journal" + Just d -> d {- .git/annex/journal.private/ is used to journal changes regarding private - repositories. -} -gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath -gitAnnexPrivateJournalDir r = - P.addTrailingPathSeparator $ gitAnnexDir r P. "journal-private" +gitAnnexPrivateJournalDir :: BranchState -> Git.Repo -> RawFilePath +gitAnnexPrivateJournalDir st r = P.addTrailingPathSeparator $ + case alternateJournal st of + Nothing -> gitAnnexDir r P. "journal-private" + Just d -> d {- Lock file for the journal. -} gitAnnexJournalLock :: Git.Repo -> RawFilePath diff --git a/CmdLine/GitRemoteAnnex.hs b/CmdLine/GitRemoteAnnex.hs index 0af908c5b7..d1eac6dfd8 100644 --- a/CmdLine/GitRemoteAnnex.hs +++ b/CmdLine/GitRemoteAnnex.hs @@ -21,6 +21,7 @@ import qualified Git.Remote import qualified Git.Remote.Remove import qualified Annex.SpecialRemote as SpecialRemote import qualified Annex.Branch +import qualified Annex.BranchState import qualified Types.Remote as Remote import qualified Logs.Remote import Remote.Helper.Encryptable (parseEncryptionMethod) @@ -32,6 +33,7 @@ import Types.RemoteConfig import Types.ProposedAccepted import Types.Export import Types.GitConfig +import Types.BranchState import Types.Difference import Types.Crypto import Git.Types @@ -44,6 +46,7 @@ import Annex.SpecialRemote.Config import Remote.List import Remote.List.Util import Utility.Tmp +import Utility.Tmp.Dir import Utility.Env import Utility.Metered @@ -485,10 +488,9 @@ withSpecialRemote cfg@(SpecialRemoteConfig {}) sab a = case specialRemoteName cf | otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename -- When cloning from an annex:: url, -- this is used to set up the origin remote. - Nothing -> (initremote remotename >>= a) - `finally` cleanupInitialization sab - Nothing -> inittempremote - `finally` cleanupInitialization sab + Nothing -> specialRemoteFromUrl sab + (initremote remotename >>= a) + Nothing -> specialRemoteFromUrl sab inittempremote where -- Initialize a new special remote with the provided configuration -- and name. @@ -869,27 +871,48 @@ getRepo = getEnv "GIT_WORK_TREE" >>= \case -- Records what the git-annex branch was at the beginning of this command. data StartAnnexBranch - = AnnexBranchExistedAlready Ref - | AnnexBranchCreatedEmpty Ref + = AnnexBranchExistedAlready Sha + | AnnexBranchCreatedEmpty Sha +{- Run early in the command, gets the initial state of the git-annex + - branch. + - + - If the branch does not exist yet, it's created here. This is done + - because it's hard to avoid the branch being created by this command, + - so tracking the sha of the created branch allows cleaning it up later. + -} startAnnexBranch :: Annex StartAnnexBranch startAnnexBranch = ifM (null <$> Annex.Branch.siblingBranches) ( AnnexBranchCreatedEmpty <$> Annex.Branch.getBranch , AnnexBranchExistedAlready <$> Annex.Branch.getBranch ) --- This is run after git has used this process to fetch or push from a --- special remote that was specified using a git-annex url. If the git --- repository was not initialized for use by git-annex already, it is still --- not initialized at this point. +-- This runs an action that will set up a special remote that +-- was specified using an annex url. -- +-- Setting up a special remote needs to write its config to the git-annex +-- branch. And using a special remote may also write to the branch. +-- But in this case, writes to the git-annex branch need to be avoided, +-- so that cleanupInitialization can leave things in the right state. +-- +-- So this prevents commits to the git-annex branch, and redirects all +-- journal writes to a temporary directory, so that all writes +-- to the git-annex branch by the action will be discarded. +specialRemoteFromUrl :: StartAnnexBranch -> Annex a -> Annex a +specialRemoteFromUrl sab a = withTmpDir "journal" $ \tmpdir -> do + Annex.overrideGitConfig $ \c -> + c { annexAlwaysCommit = False } + Annex.BranchState.changeState $ \st -> + st { alternateJournal = Just (toRawFilePath tmpdir) } + a `finally` cleanupInitialization sab + -- If the git-annex branch did not exist when this command started, --- the current contents of it were created in passing by this command, --- which is hard to avoid. But if a git-annex branch is fetched from the --- special remote and contains Differences, it would not be possible to --- merge it into the git-annex branch that was created while running this --- command. To avoid that problem, when the git-annex branch was created --- at the start of this command, it's deleted. +-- it was created empty by this command, and this command has avoided +-- making any other commits to it. If nothing else has written to the +-- branch while this command was running, the branch will be deleted. +-- That allows for the git-annex branch that is fetched from the special +-- remote to contain Differences, which would prevent it from being merged +-- with the git-annex branch created by this command. -- -- If there is still not a sibling git-annex branch, this deletes all annex -- objects for git bundles from the annex objects directory, and deletes @@ -905,10 +928,11 @@ cleanupInitialization :: StartAnnexBranch -> Annex () cleanupInitialization sab = do case sab of AnnexBranchExistedAlready _ -> noop - AnnexBranchCreatedEmpty _ -> do - inRepo $ Git.Branch.delete Annex.Branch.fullname - indexfile <- fromRepo gitAnnexIndex - liftIO $ removeWhenExistsWith R.removeLink indexfile + AnnexBranchCreatedEmpty r -> + whenM ((r ==) <$> Annex.Branch.getBranch) $ do + inRepo $ Git.Branch.delete Annex.Branch.fullname + indexfile <- fromRepo gitAnnexIndex + liftIO $ removeWhenExistsWith R.removeLink indexfile ifM Annex.Branch.hasSibling ( do autoInitialize' (pure True) remoteList diff --git a/Types/BranchState.hs b/Types/BranchState.hs index 129a17b349..d79a1c70a6 100644 --- a/Types/BranchState.hs +++ b/Types/BranchState.hs @@ -36,7 +36,10 @@ data BranchState = BranchState -- process need to be noticed while the current process is running? -- (This makes the journal always be read, and avoids using the -- cache.) + , alternateJournal :: Maybe RawFilePath + -- ^ use this directory for all journals, rather than the + -- gitAnnexJournalDir and gitAnnexPrivateJournalDir. } startBranchState :: BranchState -startBranchState = BranchState False False False [] [] [] False +startBranchState = BranchState False False False [] [] [] False Nothing diff --git a/doc/todo/git-remote-annex.mdwn b/doc/todo/git-remote-annex.mdwn index 36b3cf9155..3608360983 100644 --- a/doc/todo/git-remote-annex.mdwn +++ b/doc/todo/git-remote-annex.mdwn @@ -10,26 +10,6 @@ will be available to users who don't use datalad. This is implememented and working. Remaining todo list for it: -* Cloning writes the new special remote config into remote.log, - and *deletes* other special remote configs. - - The remote config from the url may be slightly different as well - than the existing one. Cloning should not write it. - -* The race condition described in - [[!commit 797f27ab0517e0021363791ff269300f2ba095a5]] - where before git-annex init is run in a repo, - using git-remote-annex and at the same time git-annex init can lose - changes that the latter command (and ones after it) write to the - git-annex branch. - - This should be fixable by making git-remote-annex not write to the - git-annex branch, but to eg, a temporary journal directory. - - Also, when the remote uses importtree=yes, pushing to it updates - content identifiers, which currently get recorded in the git-annex - branch. It would be good to avoid that being written as well. - * Test incremental push edge cases involving checkprereq. * Cloning from an annex:: url with importtree=yes doesn't work