clean up git-remote-annex git-annex branch handling

Implemented alternateJournal, which git-remote-annex
uses to avoid any writes to the git-annex branch while setting up
a special remote from an annex:: url.

That prevents the remote.log from being overwritten with the special
remote configuration from the url, which might not be 100% the same as
the existing special remote configuration.

And it prevents an overwrite deleting of other stuff that was
already in the remote.log.

Also, when the branch was created by git-remote-annex, only delete it
at the end if nothing else has been written to it by another command.
This fixes the race condition described in
797f27ab05, where git-remote-annex
set up the branch and git-annex init and other commands were
run at the same time and their writes to the branch were lost.
This commit is contained in:
Joey Hess 2024-05-15 17:33:38 -04:00
parent d24d8870c5
commit adcebbae47
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 84 additions and 66 deletions

View file

@ -727,7 +727,8 @@ stageJournal :: JournalLocked -> Annex () -> Annex ()
stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
prepareModifyIndex jl prepareModifyIndex jl
g <- gitRepo g <- gitRepo
let dir = gitAnnexJournalDir g st <- getState
let dir = gitAnnexJournalDir st g
(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir) (jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
withHashObjectHandle $ \h -> withHashObjectHandle $ \h ->
withJournalHandle gitAnnexJournalDir $ \jh -> withJournalHandle gitAnnexJournalDir $ \jh ->

View file

@ -7,7 +7,7 @@
- All files in the journal must be a series of lines separated by - All files in the journal must be a series of lines separated by
- newlines. - newlines.
- -
- Copyright 2011-2022 Joey Hess <id@joeyh.name> - Copyright 2011-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -23,6 +23,8 @@ import qualified Git
import Annex.Perms import Annex.Perms
import Annex.Tmp import Annex.Tmp
import Annex.LockFile import Annex.LockFile
import Annex.BranchState
import Types.BranchState
import Utility.Directory.Stream import Utility.Directory.Stream
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
@ -82,9 +84,10 @@ privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig
-} -}
setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex () setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
st <- getState
jd <- fromRepo =<< ifM (regardingPrivateUUID ru) jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
( return gitAnnexPrivateJournalDir ( return (gitAnnexPrivateJournalDir st)
, return gitAnnexJournalDir , return (gitAnnexJournalDir st)
) )
-- journal file is written atomically -- journal file is written atomically
let jfile = journalFile file let jfile = journalFile file
@ -106,9 +109,10 @@ newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath)
- branch. -} - branch. -}
checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile) checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile)
checkCanAppendJournalFile _jl ru file = do checkCanAppendJournalFile _jl ru file = do
st <- getState
jd <- fromRepo =<< ifM (regardingPrivateUUID ru) jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
( return gitAnnexPrivateJournalDir ( return (gitAnnexPrivateJournalDir st)
, return gitAnnexJournalDir , return (gitAnnexJournalDir st)
) )
let jfile = jd P.</> journalFile file let jfile = jd P.</> journalFile file
ifM (liftIO $ R.doesPathExist jfile) ifM (liftIO $ R.doesPathExist jfile)
@ -176,14 +180,12 @@ data GetPrivate = GetPrivate Bool
-} -}
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
getJournalFileStale (GetPrivate getprivate) file = do getJournalFileStale (GetPrivate getprivate) file = do
-- Optimisation to avoid a second MVar access.
st <- Annex.getState id st <- Annex.getState id
let g = Annex.repo st
liftIO $ liftIO $
if getprivate && privateUUIDsKnown' st if getprivate && privateUUIDsKnown' st
then do then do
x <- getfrom (gitAnnexJournalDir g) x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st))
getfrom (gitAnnexPrivateJournalDir g) >>= \case getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case
Nothing -> return $ case x of Nothing -> return $ case x of
Nothing -> NoJournalledContent Nothing -> NoJournalledContent
Just b -> JournalledContent b Just b -> JournalledContent b
@ -193,7 +195,7 @@ getJournalFileStale (GetPrivate getprivate) file = do
-- happens in a merge of two -- happens in a merge of two
-- git-annex branches. -- git-annex branches.
Just x' -> x' <> y Just x' -> x' <> y
else getfrom (gitAnnexJournalDir g) >>= return . \case else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case
Nothing -> NoJournalledContent Nothing -> NoJournalledContent
Just b -> JournalledContent b Just b -> JournalledContent b
where where
@ -219,18 +221,20 @@ discardIncompleteAppend v
{- List of existing journal files in a journal directory, but without locking, {- List of existing journal files in a journal directory, but without locking,
- may miss new ones just being added, or may have false positives if the - may miss new ones just being added, or may have false positives if the
- journal is staged as it is run. -} - journal is staged as it is run. -}
getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath] getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath]
getJournalledFilesStale getjournaldir = do getJournalledFilesStale getjournaldir = do
g <- gitRepo st <- Annex.getState id
fs <- liftIO $ catchDefaultIO [] $ let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
getDirectoryContents $ fromRawFilePath (getjournaldir g) fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents (fromRawFilePath d)
return $ filter (`notElem` [".", ".."]) $ return $ filter (`notElem` [".", ".."]) $
map (fileJournal . toRawFilePath) fs map (fileJournal . toRawFilePath) fs
{- Directory handle open on a journal directory. -} {- Directory handle open on a journal directory. -}
withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
withJournalHandle getjournaldir a = do withJournalHandle getjournaldir a = do
d <- fromRepo getjournaldir st <- Annex.getState id
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a) bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
where where
-- avoid overhead of creating the journal directory when it already -- avoid overhead of creating the journal directory when it already
@ -239,9 +243,10 @@ withJournalHandle getjournaldir a = do
`catchIO` (const (createAnnexDirectory d >> opendir d)) `catchIO` (const (createAnnexDirectory d >> opendir d))
{- Checks if there are changes in the journal. -} {- Checks if there are changes in the journal. -}
journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool journalDirty :: (BranchState -> Git.Repo -> RawFilePath) -> Annex Bool
journalDirty getjournaldir = do journalDirty getjournaldir = do
d <- fromRawFilePath <$> fromRepo getjournaldir st <- getState
d <- fromRawFilePath <$> fromRepo (getjournaldir st)
liftIO $ liftIO $
(not <$> isDirectoryEmpty d) (not <$> isDirectoryEmpty d)
`catchIO` (const $ doesDirectoryExist d) `catchIO` (const $ doesDirectoryExist d)

View file

@ -118,6 +118,7 @@ import Key
import Types.UUID import Types.UUID
import Types.GitConfig import Types.GitConfig
import Types.Difference import Types.Difference
import Types.BranchState
import qualified Git import qualified Git
import qualified Git.Types as Git import qualified Git.Types as Git
import Git.FilePath import Git.FilePath
@ -528,15 +529,19 @@ gitAnnexTransferDir r =
{- .git/annex/journal/ is used to journal changes made to the git-annex {- .git/annex/journal/ is used to journal changes made to the git-annex
- branch -} - branch -}
gitAnnexJournalDir :: Git.Repo -> RawFilePath gitAnnexJournalDir :: BranchState -> Git.Repo -> RawFilePath
gitAnnexJournalDir r = gitAnnexJournalDir st r = P.addTrailingPathSeparator $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal" case alternateJournal st of
Nothing -> gitAnnexDir r P.</> "journal"
Just d -> d
{- .git/annex/journal.private/ is used to journal changes regarding private {- .git/annex/journal.private/ is used to journal changes regarding private
- repositories. -} - repositories. -}
gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath gitAnnexPrivateJournalDir :: BranchState -> Git.Repo -> RawFilePath
gitAnnexPrivateJournalDir r = gitAnnexPrivateJournalDir st r = P.addTrailingPathSeparator $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal-private" case alternateJournal st of
Nothing -> gitAnnexDir r P.</> "journal-private"
Just d -> d
{- Lock file for the journal. -} {- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> RawFilePath gitAnnexJournalLock :: Git.Repo -> RawFilePath

View file

@ -21,6 +21,7 @@ import qualified Git.Remote
import qualified Git.Remote.Remove import qualified Git.Remote.Remove
import qualified Annex.SpecialRemote as SpecialRemote import qualified Annex.SpecialRemote as SpecialRemote
import qualified Annex.Branch import qualified Annex.Branch
import qualified Annex.BranchState
import qualified Types.Remote as Remote import qualified Types.Remote as Remote
import qualified Logs.Remote import qualified Logs.Remote
import Remote.Helper.Encryptable (parseEncryptionMethod) import Remote.Helper.Encryptable (parseEncryptionMethod)
@ -32,6 +33,7 @@ import Types.RemoteConfig
import Types.ProposedAccepted import Types.ProposedAccepted
import Types.Export import Types.Export
import Types.GitConfig import Types.GitConfig
import Types.BranchState
import Types.Difference import Types.Difference
import Types.Crypto import Types.Crypto
import Git.Types import Git.Types
@ -44,6 +46,7 @@ import Annex.SpecialRemote.Config
import Remote.List import Remote.List
import Remote.List.Util import Remote.List.Util
import Utility.Tmp import Utility.Tmp
import Utility.Tmp.Dir
import Utility.Env import Utility.Env
import Utility.Metered import Utility.Metered
@ -485,10 +488,9 @@ withSpecialRemote cfg@(SpecialRemoteConfig {}) sab a = case specialRemoteName cf
| otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename | otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename
-- When cloning from an annex:: url, -- When cloning from an annex:: url,
-- this is used to set up the origin remote. -- this is used to set up the origin remote.
Nothing -> (initremote remotename >>= a) Nothing -> specialRemoteFromUrl sab
`finally` cleanupInitialization sab (initremote remotename >>= a)
Nothing -> inittempremote Nothing -> specialRemoteFromUrl sab inittempremote
`finally` cleanupInitialization sab
where where
-- Initialize a new special remote with the provided configuration -- Initialize a new special remote with the provided configuration
-- and name. -- and name.
@ -869,27 +871,48 @@ getRepo = getEnv "GIT_WORK_TREE" >>= \case
-- Records what the git-annex branch was at the beginning of this command. -- Records what the git-annex branch was at the beginning of this command.
data StartAnnexBranch data StartAnnexBranch
= AnnexBranchExistedAlready Ref = AnnexBranchExistedAlready Sha
| AnnexBranchCreatedEmpty Ref | AnnexBranchCreatedEmpty Sha
{- Run early in the command, gets the initial state of the git-annex
- branch.
-
- If the branch does not exist yet, it's created here. This is done
- because it's hard to avoid the branch being created by this command,
- so tracking the sha of the created branch allows cleaning it up later.
-}
startAnnexBranch :: Annex StartAnnexBranch startAnnexBranch :: Annex StartAnnexBranch
startAnnexBranch = ifM (null <$> Annex.Branch.siblingBranches) startAnnexBranch = ifM (null <$> Annex.Branch.siblingBranches)
( AnnexBranchCreatedEmpty <$> Annex.Branch.getBranch ( AnnexBranchCreatedEmpty <$> Annex.Branch.getBranch
, AnnexBranchExistedAlready <$> Annex.Branch.getBranch , AnnexBranchExistedAlready <$> Annex.Branch.getBranch
) )
-- This is run after git has used this process to fetch or push from a -- This runs an action that will set up a special remote that
-- special remote that was specified using a git-annex url. If the git -- was specified using an annex url.
-- repository was not initialized for use by git-annex already, it is still
-- not initialized at this point.
-- --
-- Setting up a special remote needs to write its config to the git-annex
-- branch. And using a special remote may also write to the branch.
-- But in this case, writes to the git-annex branch need to be avoided,
-- so that cleanupInitialization can leave things in the right state.
--
-- So this prevents commits to the git-annex branch, and redirects all
-- journal writes to a temporary directory, so that all writes
-- to the git-annex branch by the action will be discarded.
specialRemoteFromUrl :: StartAnnexBranch -> Annex a -> Annex a
specialRemoteFromUrl sab a = withTmpDir "journal" $ \tmpdir -> do
Annex.overrideGitConfig $ \c ->
c { annexAlwaysCommit = False }
Annex.BranchState.changeState $ \st ->
st { alternateJournal = Just (toRawFilePath tmpdir) }
a `finally` cleanupInitialization sab
-- If the git-annex branch did not exist when this command started, -- If the git-annex branch did not exist when this command started,
-- the current contents of it were created in passing by this command, -- it was created empty by this command, and this command has avoided
-- which is hard to avoid. But if a git-annex branch is fetched from the -- making any other commits to it. If nothing else has written to the
-- special remote and contains Differences, it would not be possible to -- branch while this command was running, the branch will be deleted.
-- merge it into the git-annex branch that was created while running this -- That allows for the git-annex branch that is fetched from the special
-- command. To avoid that problem, when the git-annex branch was created -- remote to contain Differences, which would prevent it from being merged
-- at the start of this command, it's deleted. -- with the git-annex branch created by this command.
-- --
-- If there is still not a sibling git-annex branch, this deletes all annex -- If there is still not a sibling git-annex branch, this deletes all annex
-- objects for git bundles from the annex objects directory, and deletes -- objects for git bundles from the annex objects directory, and deletes
@ -905,10 +928,11 @@ cleanupInitialization :: StartAnnexBranch -> Annex ()
cleanupInitialization sab = do cleanupInitialization sab = do
case sab of case sab of
AnnexBranchExistedAlready _ -> noop AnnexBranchExistedAlready _ -> noop
AnnexBranchCreatedEmpty _ -> do AnnexBranchCreatedEmpty r ->
inRepo $ Git.Branch.delete Annex.Branch.fullname whenM ((r ==) <$> Annex.Branch.getBranch) $ do
indexfile <- fromRepo gitAnnexIndex inRepo $ Git.Branch.delete Annex.Branch.fullname
liftIO $ removeWhenExistsWith R.removeLink indexfile indexfile <- fromRepo gitAnnexIndex
liftIO $ removeWhenExistsWith R.removeLink indexfile
ifM Annex.Branch.hasSibling ifM Annex.Branch.hasSibling
( do ( do
autoInitialize' (pure True) remoteList autoInitialize' (pure True) remoteList

View file

@ -36,7 +36,10 @@ data BranchState = BranchState
-- process need to be noticed while the current process is running? -- process need to be noticed while the current process is running?
-- (This makes the journal always be read, and avoids using the -- (This makes the journal always be read, and avoids using the
-- cache.) -- cache.)
, alternateJournal :: Maybe RawFilePath
-- ^ use this directory for all journals, rather than the
-- gitAnnexJournalDir and gitAnnexPrivateJournalDir.
} }
startBranchState :: BranchState startBranchState :: BranchState
startBranchState = BranchState False False False [] [] [] False startBranchState = BranchState False False False [] [] [] False Nothing

View file

@ -10,26 +10,6 @@ will be available to users who don't use datalad.
This is implememented and working. Remaining todo list for it: This is implememented and working. Remaining todo list for it:
* Cloning writes the new special remote config into remote.log,
and *deletes* other special remote configs.
The remote config from the url may be slightly different as well
than the existing one. Cloning should not write it.
* The race condition described in
[[!commit 797f27ab0517e0021363791ff269300f2ba095a5]]
where before git-annex init is run in a repo,
using git-remote-annex and at the same time git-annex init can lose
changes that the latter command (and ones after it) write to the
git-annex branch.
This should be fixable by making git-remote-annex not write to the
git-annex branch, but to eg, a temporary journal directory.
Also, when the remote uses importtree=yes, pushing to it updates
content identifiers, which currently get recorded in the git-annex
branch. It would be good to avoid that being written as well.
* Test incremental push edge cases involving checkprereq. * Test incremental push edge cases involving checkprereq.
* Cloning from an annex:: url with importtree=yes doesn't work * Cloning from an annex:: url with importtree=yes doesn't work