clean up git-remote-annex git-annex branch handling
Implemented alternateJournal, which git-remote-annex
uses to avoid any writes to the git-annex branch while setting up
a special remote from an annex:: url.
That prevents the remote.log from being overwritten with the special
remote configuration from the url, which might not be 100% the same as
the existing special remote configuration.
And it prevents an overwrite deleting of other stuff that was
already in the remote.log.
Also, when the branch was created by git-remote-annex, only delete it
at the end if nothing else has been written to it by another command.
This fixes the race condition described in
797f27ab05
, where git-remote-annex
set up the branch and git-annex init and other commands were
run at the same time and their writes to the branch were lost.
This commit is contained in:
parent
d24d8870c5
commit
adcebbae47
6 changed files with 84 additions and 66 deletions
|
@ -727,7 +727,8 @@ stageJournal :: JournalLocked -> Annex () -> Annex ()
|
||||||
stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
|
stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
|
||||||
prepareModifyIndex jl
|
prepareModifyIndex jl
|
||||||
g <- gitRepo
|
g <- gitRepo
|
||||||
let dir = gitAnnexJournalDir g
|
st <- getState
|
||||||
|
let dir = gitAnnexJournalDir st g
|
||||||
(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
|
(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
|
||||||
withHashObjectHandle $ \h ->
|
withHashObjectHandle $ \h ->
|
||||||
withJournalHandle gitAnnexJournalDir $ \jh ->
|
withJournalHandle gitAnnexJournalDir $ \jh ->
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
- All files in the journal must be a series of lines separated by
|
- All files in the journal must be a series of lines separated by
|
||||||
- newlines.
|
- newlines.
|
||||||
-
|
-
|
||||||
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -23,6 +23,8 @@ import qualified Git
|
||||||
import Annex.Perms
|
import Annex.Perms
|
||||||
import Annex.Tmp
|
import Annex.Tmp
|
||||||
import Annex.LockFile
|
import Annex.LockFile
|
||||||
|
import Annex.BranchState
|
||||||
|
import Types.BranchState
|
||||||
import Utility.Directory.Stream
|
import Utility.Directory.Stream
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
|
@ -82,9 +84,10 @@ privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig
|
||||||
-}
|
-}
|
||||||
setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
|
setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
|
||||||
setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
||||||
|
st <- getState
|
||||||
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
||||||
( return gitAnnexPrivateJournalDir
|
( return (gitAnnexPrivateJournalDir st)
|
||||||
, return gitAnnexJournalDir
|
, return (gitAnnexJournalDir st)
|
||||||
)
|
)
|
||||||
-- journal file is written atomically
|
-- journal file is written atomically
|
||||||
let jfile = journalFile file
|
let jfile = journalFile file
|
||||||
|
@ -106,9 +109,10 @@ newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath)
|
||||||
- branch. -}
|
- branch. -}
|
||||||
checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile)
|
checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile)
|
||||||
checkCanAppendJournalFile _jl ru file = do
|
checkCanAppendJournalFile _jl ru file = do
|
||||||
|
st <- getState
|
||||||
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
||||||
( return gitAnnexPrivateJournalDir
|
( return (gitAnnexPrivateJournalDir st)
|
||||||
, return gitAnnexJournalDir
|
, return (gitAnnexJournalDir st)
|
||||||
)
|
)
|
||||||
let jfile = jd P.</> journalFile file
|
let jfile = jd P.</> journalFile file
|
||||||
ifM (liftIO $ R.doesPathExist jfile)
|
ifM (liftIO $ R.doesPathExist jfile)
|
||||||
|
@ -176,14 +180,12 @@ data GetPrivate = GetPrivate Bool
|
||||||
-}
|
-}
|
||||||
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
|
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
|
||||||
getJournalFileStale (GetPrivate getprivate) file = do
|
getJournalFileStale (GetPrivate getprivate) file = do
|
||||||
-- Optimisation to avoid a second MVar access.
|
|
||||||
st <- Annex.getState id
|
st <- Annex.getState id
|
||||||
let g = Annex.repo st
|
|
||||||
liftIO $
|
liftIO $
|
||||||
if getprivate && privateUUIDsKnown' st
|
if getprivate && privateUUIDsKnown' st
|
||||||
then do
|
then do
|
||||||
x <- getfrom (gitAnnexJournalDir g)
|
x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st))
|
||||||
getfrom (gitAnnexPrivateJournalDir g) >>= \case
|
getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case
|
||||||
Nothing -> return $ case x of
|
Nothing -> return $ case x of
|
||||||
Nothing -> NoJournalledContent
|
Nothing -> NoJournalledContent
|
||||||
Just b -> JournalledContent b
|
Just b -> JournalledContent b
|
||||||
|
@ -193,7 +195,7 @@ getJournalFileStale (GetPrivate getprivate) file = do
|
||||||
-- happens in a merge of two
|
-- happens in a merge of two
|
||||||
-- git-annex branches.
|
-- git-annex branches.
|
||||||
Just x' -> x' <> y
|
Just x' -> x' <> y
|
||||||
else getfrom (gitAnnexJournalDir g) >>= return . \case
|
else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case
|
||||||
Nothing -> NoJournalledContent
|
Nothing -> NoJournalledContent
|
||||||
Just b -> JournalledContent b
|
Just b -> JournalledContent b
|
||||||
where
|
where
|
||||||
|
@ -219,18 +221,20 @@ discardIncompleteAppend v
|
||||||
{- List of existing journal files in a journal directory, but without locking,
|
{- List of existing journal files in a journal directory, but without locking,
|
||||||
- may miss new ones just being added, or may have false positives if the
|
- may miss new ones just being added, or may have false positives if the
|
||||||
- journal is staged as it is run. -}
|
- journal is staged as it is run. -}
|
||||||
getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath]
|
getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath]
|
||||||
getJournalledFilesStale getjournaldir = do
|
getJournalledFilesStale getjournaldir = do
|
||||||
g <- gitRepo
|
st <- Annex.getState id
|
||||||
|
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
|
||||||
fs <- liftIO $ catchDefaultIO [] $
|
fs <- liftIO $ catchDefaultIO [] $
|
||||||
getDirectoryContents $ fromRawFilePath (getjournaldir g)
|
getDirectoryContents (fromRawFilePath d)
|
||||||
return $ filter (`notElem` [".", ".."]) $
|
return $ filter (`notElem` [".", ".."]) $
|
||||||
map (fileJournal . toRawFilePath) fs
|
map (fileJournal . toRawFilePath) fs
|
||||||
|
|
||||||
{- Directory handle open on a journal directory. -}
|
{- Directory handle open on a journal directory. -}
|
||||||
withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
|
withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
|
||||||
withJournalHandle getjournaldir a = do
|
withJournalHandle getjournaldir a = do
|
||||||
d <- fromRepo getjournaldir
|
st <- Annex.getState id
|
||||||
|
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
|
||||||
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
|
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
|
||||||
where
|
where
|
||||||
-- avoid overhead of creating the journal directory when it already
|
-- avoid overhead of creating the journal directory when it already
|
||||||
|
@ -239,9 +243,10 @@ withJournalHandle getjournaldir a = do
|
||||||
`catchIO` (const (createAnnexDirectory d >> opendir d))
|
`catchIO` (const (createAnnexDirectory d >> opendir d))
|
||||||
|
|
||||||
{- Checks if there are changes in the journal. -}
|
{- Checks if there are changes in the journal. -}
|
||||||
journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool
|
journalDirty :: (BranchState -> Git.Repo -> RawFilePath) -> Annex Bool
|
||||||
journalDirty getjournaldir = do
|
journalDirty getjournaldir = do
|
||||||
d <- fromRawFilePath <$> fromRepo getjournaldir
|
st <- getState
|
||||||
|
d <- fromRawFilePath <$> fromRepo (getjournaldir st)
|
||||||
liftIO $
|
liftIO $
|
||||||
(not <$> isDirectoryEmpty d)
|
(not <$> isDirectoryEmpty d)
|
||||||
`catchIO` (const $ doesDirectoryExist d)
|
`catchIO` (const $ doesDirectoryExist d)
|
||||||
|
|
|
@ -118,6 +118,7 @@ import Key
|
||||||
import Types.UUID
|
import Types.UUID
|
||||||
import Types.GitConfig
|
import Types.GitConfig
|
||||||
import Types.Difference
|
import Types.Difference
|
||||||
|
import Types.BranchState
|
||||||
import qualified Git
|
import qualified Git
|
||||||
import qualified Git.Types as Git
|
import qualified Git.Types as Git
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
|
@ -528,15 +529,19 @@ gitAnnexTransferDir r =
|
||||||
|
|
||||||
{- .git/annex/journal/ is used to journal changes made to the git-annex
|
{- .git/annex/journal/ is used to journal changes made to the git-annex
|
||||||
- branch -}
|
- branch -}
|
||||||
gitAnnexJournalDir :: Git.Repo -> RawFilePath
|
gitAnnexJournalDir :: BranchState -> Git.Repo -> RawFilePath
|
||||||
gitAnnexJournalDir r =
|
gitAnnexJournalDir st r = P.addTrailingPathSeparator $
|
||||||
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal"
|
case alternateJournal st of
|
||||||
|
Nothing -> gitAnnexDir r P.</> "journal"
|
||||||
|
Just d -> d
|
||||||
|
|
||||||
{- .git/annex/journal.private/ is used to journal changes regarding private
|
{- .git/annex/journal.private/ is used to journal changes regarding private
|
||||||
- repositories. -}
|
- repositories. -}
|
||||||
gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath
|
gitAnnexPrivateJournalDir :: BranchState -> Git.Repo -> RawFilePath
|
||||||
gitAnnexPrivateJournalDir r =
|
gitAnnexPrivateJournalDir st r = P.addTrailingPathSeparator $
|
||||||
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal-private"
|
case alternateJournal st of
|
||||||
|
Nothing -> gitAnnexDir r P.</> "journal-private"
|
||||||
|
Just d -> d
|
||||||
|
|
||||||
{- Lock file for the journal. -}
|
{- Lock file for the journal. -}
|
||||||
gitAnnexJournalLock :: Git.Repo -> RawFilePath
|
gitAnnexJournalLock :: Git.Repo -> RawFilePath
|
||||||
|
|
|
@ -21,6 +21,7 @@ import qualified Git.Remote
|
||||||
import qualified Git.Remote.Remove
|
import qualified Git.Remote.Remove
|
||||||
import qualified Annex.SpecialRemote as SpecialRemote
|
import qualified Annex.SpecialRemote as SpecialRemote
|
||||||
import qualified Annex.Branch
|
import qualified Annex.Branch
|
||||||
|
import qualified Annex.BranchState
|
||||||
import qualified Types.Remote as Remote
|
import qualified Types.Remote as Remote
|
||||||
import qualified Logs.Remote
|
import qualified Logs.Remote
|
||||||
import Remote.Helper.Encryptable (parseEncryptionMethod)
|
import Remote.Helper.Encryptable (parseEncryptionMethod)
|
||||||
|
@ -32,6 +33,7 @@ import Types.RemoteConfig
|
||||||
import Types.ProposedAccepted
|
import Types.ProposedAccepted
|
||||||
import Types.Export
|
import Types.Export
|
||||||
import Types.GitConfig
|
import Types.GitConfig
|
||||||
|
import Types.BranchState
|
||||||
import Types.Difference
|
import Types.Difference
|
||||||
import Types.Crypto
|
import Types.Crypto
|
||||||
import Git.Types
|
import Git.Types
|
||||||
|
@ -44,6 +46,7 @@ import Annex.SpecialRemote.Config
|
||||||
import Remote.List
|
import Remote.List
|
||||||
import Remote.List.Util
|
import Remote.List.Util
|
||||||
import Utility.Tmp
|
import Utility.Tmp
|
||||||
|
import Utility.Tmp.Dir
|
||||||
import Utility.Env
|
import Utility.Env
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
|
|
||||||
|
@ -485,10 +488,9 @@ withSpecialRemote cfg@(SpecialRemoteConfig {}) sab a = case specialRemoteName cf
|
||||||
| otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename
|
| otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename
|
||||||
-- When cloning from an annex:: url,
|
-- When cloning from an annex:: url,
|
||||||
-- this is used to set up the origin remote.
|
-- this is used to set up the origin remote.
|
||||||
Nothing -> (initremote remotename >>= a)
|
Nothing -> specialRemoteFromUrl sab
|
||||||
`finally` cleanupInitialization sab
|
(initremote remotename >>= a)
|
||||||
Nothing -> inittempremote
|
Nothing -> specialRemoteFromUrl sab inittempremote
|
||||||
`finally` cleanupInitialization sab
|
|
||||||
where
|
where
|
||||||
-- Initialize a new special remote with the provided configuration
|
-- Initialize a new special remote with the provided configuration
|
||||||
-- and name.
|
-- and name.
|
||||||
|
@ -869,27 +871,48 @@ getRepo = getEnv "GIT_WORK_TREE" >>= \case
|
||||||
|
|
||||||
-- Records what the git-annex branch was at the beginning of this command.
|
-- Records what the git-annex branch was at the beginning of this command.
|
||||||
data StartAnnexBranch
|
data StartAnnexBranch
|
||||||
= AnnexBranchExistedAlready Ref
|
= AnnexBranchExistedAlready Sha
|
||||||
| AnnexBranchCreatedEmpty Ref
|
| AnnexBranchCreatedEmpty Sha
|
||||||
|
|
||||||
|
{- Run early in the command, gets the initial state of the git-annex
|
||||||
|
- branch.
|
||||||
|
-
|
||||||
|
- If the branch does not exist yet, it's created here. This is done
|
||||||
|
- because it's hard to avoid the branch being created by this command,
|
||||||
|
- so tracking the sha of the created branch allows cleaning it up later.
|
||||||
|
-}
|
||||||
startAnnexBranch :: Annex StartAnnexBranch
|
startAnnexBranch :: Annex StartAnnexBranch
|
||||||
startAnnexBranch = ifM (null <$> Annex.Branch.siblingBranches)
|
startAnnexBranch = ifM (null <$> Annex.Branch.siblingBranches)
|
||||||
( AnnexBranchCreatedEmpty <$> Annex.Branch.getBranch
|
( AnnexBranchCreatedEmpty <$> Annex.Branch.getBranch
|
||||||
, AnnexBranchExistedAlready <$> Annex.Branch.getBranch
|
, AnnexBranchExistedAlready <$> Annex.Branch.getBranch
|
||||||
)
|
)
|
||||||
|
|
||||||
-- This is run after git has used this process to fetch or push from a
|
-- This runs an action that will set up a special remote that
|
||||||
-- special remote that was specified using a git-annex url. If the git
|
-- was specified using an annex url.
|
||||||
-- repository was not initialized for use by git-annex already, it is still
|
|
||||||
-- not initialized at this point.
|
|
||||||
--
|
--
|
||||||
|
-- Setting up a special remote needs to write its config to the git-annex
|
||||||
|
-- branch. And using a special remote may also write to the branch.
|
||||||
|
-- But in this case, writes to the git-annex branch need to be avoided,
|
||||||
|
-- so that cleanupInitialization can leave things in the right state.
|
||||||
|
--
|
||||||
|
-- So this prevents commits to the git-annex branch, and redirects all
|
||||||
|
-- journal writes to a temporary directory, so that all writes
|
||||||
|
-- to the git-annex branch by the action will be discarded.
|
||||||
|
specialRemoteFromUrl :: StartAnnexBranch -> Annex a -> Annex a
|
||||||
|
specialRemoteFromUrl sab a = withTmpDir "journal" $ \tmpdir -> do
|
||||||
|
Annex.overrideGitConfig $ \c ->
|
||||||
|
c { annexAlwaysCommit = False }
|
||||||
|
Annex.BranchState.changeState $ \st ->
|
||||||
|
st { alternateJournal = Just (toRawFilePath tmpdir) }
|
||||||
|
a `finally` cleanupInitialization sab
|
||||||
|
|
||||||
-- If the git-annex branch did not exist when this command started,
|
-- If the git-annex branch did not exist when this command started,
|
||||||
-- the current contents of it were created in passing by this command,
|
-- it was created empty by this command, and this command has avoided
|
||||||
-- which is hard to avoid. But if a git-annex branch is fetched from the
|
-- making any other commits to it. If nothing else has written to the
|
||||||
-- special remote and contains Differences, it would not be possible to
|
-- branch while this command was running, the branch will be deleted.
|
||||||
-- merge it into the git-annex branch that was created while running this
|
-- That allows for the git-annex branch that is fetched from the special
|
||||||
-- command. To avoid that problem, when the git-annex branch was created
|
-- remote to contain Differences, which would prevent it from being merged
|
||||||
-- at the start of this command, it's deleted.
|
-- with the git-annex branch created by this command.
|
||||||
--
|
--
|
||||||
-- If there is still not a sibling git-annex branch, this deletes all annex
|
-- If there is still not a sibling git-annex branch, this deletes all annex
|
||||||
-- objects for git bundles from the annex objects directory, and deletes
|
-- objects for git bundles from the annex objects directory, and deletes
|
||||||
|
@ -905,7 +928,8 @@ cleanupInitialization :: StartAnnexBranch -> Annex ()
|
||||||
cleanupInitialization sab = do
|
cleanupInitialization sab = do
|
||||||
case sab of
|
case sab of
|
||||||
AnnexBranchExistedAlready _ -> noop
|
AnnexBranchExistedAlready _ -> noop
|
||||||
AnnexBranchCreatedEmpty _ -> do
|
AnnexBranchCreatedEmpty r ->
|
||||||
|
whenM ((r ==) <$> Annex.Branch.getBranch) $ do
|
||||||
inRepo $ Git.Branch.delete Annex.Branch.fullname
|
inRepo $ Git.Branch.delete Annex.Branch.fullname
|
||||||
indexfile <- fromRepo gitAnnexIndex
|
indexfile <- fromRepo gitAnnexIndex
|
||||||
liftIO $ removeWhenExistsWith R.removeLink indexfile
|
liftIO $ removeWhenExistsWith R.removeLink indexfile
|
||||||
|
|
|
@ -36,7 +36,10 @@ data BranchState = BranchState
|
||||||
-- process need to be noticed while the current process is running?
|
-- process need to be noticed while the current process is running?
|
||||||
-- (This makes the journal always be read, and avoids using the
|
-- (This makes the journal always be read, and avoids using the
|
||||||
-- cache.)
|
-- cache.)
|
||||||
|
, alternateJournal :: Maybe RawFilePath
|
||||||
|
-- ^ use this directory for all journals, rather than the
|
||||||
|
-- gitAnnexJournalDir and gitAnnexPrivateJournalDir.
|
||||||
}
|
}
|
||||||
|
|
||||||
startBranchState :: BranchState
|
startBranchState :: BranchState
|
||||||
startBranchState = BranchState False False False [] [] [] False
|
startBranchState = BranchState False False False [] [] [] False Nothing
|
||||||
|
|
|
@ -10,26 +10,6 @@ will be available to users who don't use datalad.
|
||||||
|
|
||||||
This is implememented and working. Remaining todo list for it:
|
This is implememented and working. Remaining todo list for it:
|
||||||
|
|
||||||
* Cloning writes the new special remote config into remote.log,
|
|
||||||
and *deletes* other special remote configs.
|
|
||||||
|
|
||||||
The remote config from the url may be slightly different as well
|
|
||||||
than the existing one. Cloning should not write it.
|
|
||||||
|
|
||||||
* The race condition described in
|
|
||||||
[[!commit 797f27ab0517e0021363791ff269300f2ba095a5]]
|
|
||||||
where before git-annex init is run in a repo,
|
|
||||||
using git-remote-annex and at the same time git-annex init can lose
|
|
||||||
changes that the latter command (and ones after it) write to the
|
|
||||||
git-annex branch.
|
|
||||||
|
|
||||||
This should be fixable by making git-remote-annex not write to the
|
|
||||||
git-annex branch, but to eg, a temporary journal directory.
|
|
||||||
|
|
||||||
Also, when the remote uses importtree=yes, pushing to it updates
|
|
||||||
content identifiers, which currently get recorded in the git-annex
|
|
||||||
branch. It would be good to avoid that being written as well.
|
|
||||||
|
|
||||||
* Test incremental push edge cases involving checkprereq.
|
* Test incremental push edge cases involving checkprereq.
|
||||||
|
|
||||||
* Cloning from an annex:: url with importtree=yes doesn't work
|
* Cloning from an annex:: url with importtree=yes doesn't work
|
||||||
|
|
Loading…
Add table
Reference in a new issue