clean up git-remote-annex git-annex branch handling
Implemented alternateJournal, which git-remote-annex
uses to avoid any writes to the git-annex branch while setting up
a special remote from an annex:: url.
That prevents the remote.log from being overwritten with the special
remote configuration from the url, which might not be 100% the same as
the existing special remote configuration.
And it prevents an overwrite deleting of other stuff that was
already in the remote.log.
Also, when the branch was created by git-remote-annex, only delete it
at the end if nothing else has been written to it by another command.
This fixes the race condition described in
797f27ab05
, where git-remote-annex
set up the branch and git-annex init and other commands were
run at the same time and their writes to the branch were lost.
This commit is contained in:
parent
d24d8870c5
commit
adcebbae47
6 changed files with 84 additions and 66 deletions
|
@ -727,7 +727,8 @@ stageJournal :: JournalLocked -> Annex () -> Annex ()
|
|||
stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
|
||||
prepareModifyIndex jl
|
||||
g <- gitRepo
|
||||
let dir = gitAnnexJournalDir g
|
||||
st <- getState
|
||||
let dir = gitAnnexJournalDir st g
|
||||
(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
|
||||
withHashObjectHandle $ \h ->
|
||||
withJournalHandle gitAnnexJournalDir $ \jh ->
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
- All files in the journal must be a series of lines separated by
|
||||
- newlines.
|
||||
-
|
||||
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -23,6 +23,8 @@ import qualified Git
|
|||
import Annex.Perms
|
||||
import Annex.Tmp
|
||||
import Annex.LockFile
|
||||
import Annex.BranchState
|
||||
import Types.BranchState
|
||||
import Utility.Directory.Stream
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
|
@ -82,9 +84,10 @@ privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig
|
|||
-}
|
||||
setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
|
||||
setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
||||
st <- getState
|
||||
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
||||
( return gitAnnexPrivateJournalDir
|
||||
, return gitAnnexJournalDir
|
||||
( return (gitAnnexPrivateJournalDir st)
|
||||
, return (gitAnnexJournalDir st)
|
||||
)
|
||||
-- journal file is written atomically
|
||||
let jfile = journalFile file
|
||||
|
@ -106,9 +109,10 @@ newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath)
|
|||
- branch. -}
|
||||
checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile)
|
||||
checkCanAppendJournalFile _jl ru file = do
|
||||
st <- getState
|
||||
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
||||
( return gitAnnexPrivateJournalDir
|
||||
, return gitAnnexJournalDir
|
||||
( return (gitAnnexPrivateJournalDir st)
|
||||
, return (gitAnnexJournalDir st)
|
||||
)
|
||||
let jfile = jd P.</> journalFile file
|
||||
ifM (liftIO $ R.doesPathExist jfile)
|
||||
|
@ -176,14 +180,12 @@ data GetPrivate = GetPrivate Bool
|
|||
-}
|
||||
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
|
||||
getJournalFileStale (GetPrivate getprivate) file = do
|
||||
-- Optimisation to avoid a second MVar access.
|
||||
st <- Annex.getState id
|
||||
let g = Annex.repo st
|
||||
liftIO $
|
||||
if getprivate && privateUUIDsKnown' st
|
||||
then do
|
||||
x <- getfrom (gitAnnexJournalDir g)
|
||||
getfrom (gitAnnexPrivateJournalDir g) >>= \case
|
||||
x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st))
|
||||
getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case
|
||||
Nothing -> return $ case x of
|
||||
Nothing -> NoJournalledContent
|
||||
Just b -> JournalledContent b
|
||||
|
@ -193,7 +195,7 @@ getJournalFileStale (GetPrivate getprivate) file = do
|
|||
-- happens in a merge of two
|
||||
-- git-annex branches.
|
||||
Just x' -> x' <> y
|
||||
else getfrom (gitAnnexJournalDir g) >>= return . \case
|
||||
else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case
|
||||
Nothing -> NoJournalledContent
|
||||
Just b -> JournalledContent b
|
||||
where
|
||||
|
@ -219,18 +221,20 @@ discardIncompleteAppend v
|
|||
{- List of existing journal files in a journal directory, but without locking,
|
||||
- may miss new ones just being added, or may have false positives if the
|
||||
- journal is staged as it is run. -}
|
||||
getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath]
|
||||
getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath]
|
||||
getJournalledFilesStale getjournaldir = do
|
||||
g <- gitRepo
|
||||
fs <- liftIO $ catchDefaultIO [] $
|
||||
getDirectoryContents $ fromRawFilePath (getjournaldir g)
|
||||
st <- Annex.getState id
|
||||
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
|
||||
fs <- liftIO $ catchDefaultIO [] $
|
||||
getDirectoryContents (fromRawFilePath d)
|
||||
return $ filter (`notElem` [".", ".."]) $
|
||||
map (fileJournal . toRawFilePath) fs
|
||||
|
||||
{- Directory handle open on a journal directory. -}
|
||||
withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
|
||||
withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
|
||||
withJournalHandle getjournaldir a = do
|
||||
d <- fromRepo getjournaldir
|
||||
st <- Annex.getState id
|
||||
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
|
||||
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
|
||||
where
|
||||
-- avoid overhead of creating the journal directory when it already
|
||||
|
@ -239,9 +243,10 @@ withJournalHandle getjournaldir a = do
|
|||
`catchIO` (const (createAnnexDirectory d >> opendir d))
|
||||
|
||||
{- Checks if there are changes in the journal. -}
|
||||
journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool
|
||||
journalDirty :: (BranchState -> Git.Repo -> RawFilePath) -> Annex Bool
|
||||
journalDirty getjournaldir = do
|
||||
d <- fromRawFilePath <$> fromRepo getjournaldir
|
||||
st <- getState
|
||||
d <- fromRawFilePath <$> fromRepo (getjournaldir st)
|
||||
liftIO $
|
||||
(not <$> isDirectoryEmpty d)
|
||||
`catchIO` (const $ doesDirectoryExist d)
|
||||
|
|
|
@ -118,6 +118,7 @@ import Key
|
|||
import Types.UUID
|
||||
import Types.GitConfig
|
||||
import Types.Difference
|
||||
import Types.BranchState
|
||||
import qualified Git
|
||||
import qualified Git.Types as Git
|
||||
import Git.FilePath
|
||||
|
@ -528,15 +529,19 @@ gitAnnexTransferDir r =
|
|||
|
||||
{- .git/annex/journal/ is used to journal changes made to the git-annex
|
||||
- branch -}
|
||||
gitAnnexJournalDir :: Git.Repo -> RawFilePath
|
||||
gitAnnexJournalDir r =
|
||||
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal"
|
||||
gitAnnexJournalDir :: BranchState -> Git.Repo -> RawFilePath
|
||||
gitAnnexJournalDir st r = P.addTrailingPathSeparator $
|
||||
case alternateJournal st of
|
||||
Nothing -> gitAnnexDir r P.</> "journal"
|
||||
Just d -> d
|
||||
|
||||
{- .git/annex/journal.private/ is used to journal changes regarding private
|
||||
- repositories. -}
|
||||
gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath
|
||||
gitAnnexPrivateJournalDir r =
|
||||
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal-private"
|
||||
gitAnnexPrivateJournalDir :: BranchState -> Git.Repo -> RawFilePath
|
||||
gitAnnexPrivateJournalDir st r = P.addTrailingPathSeparator $
|
||||
case alternateJournal st of
|
||||
Nothing -> gitAnnexDir r P.</> "journal-private"
|
||||
Just d -> d
|
||||
|
||||
{- Lock file for the journal. -}
|
||||
gitAnnexJournalLock :: Git.Repo -> RawFilePath
|
||||
|
|
|
@ -21,6 +21,7 @@ import qualified Git.Remote
|
|||
import qualified Git.Remote.Remove
|
||||
import qualified Annex.SpecialRemote as SpecialRemote
|
||||
import qualified Annex.Branch
|
||||
import qualified Annex.BranchState
|
||||
import qualified Types.Remote as Remote
|
||||
import qualified Logs.Remote
|
||||
import Remote.Helper.Encryptable (parseEncryptionMethod)
|
||||
|
@ -32,6 +33,7 @@ import Types.RemoteConfig
|
|||
import Types.ProposedAccepted
|
||||
import Types.Export
|
||||
import Types.GitConfig
|
||||
import Types.BranchState
|
||||
import Types.Difference
|
||||
import Types.Crypto
|
||||
import Git.Types
|
||||
|
@ -44,6 +46,7 @@ import Annex.SpecialRemote.Config
|
|||
import Remote.List
|
||||
import Remote.List.Util
|
||||
import Utility.Tmp
|
||||
import Utility.Tmp.Dir
|
||||
import Utility.Env
|
||||
import Utility.Metered
|
||||
|
||||
|
@ -485,10 +488,9 @@ withSpecialRemote cfg@(SpecialRemoteConfig {}) sab a = case specialRemoteName cf
|
|||
| otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename
|
||||
-- When cloning from an annex:: url,
|
||||
-- this is used to set up the origin remote.
|
||||
Nothing -> (initremote remotename >>= a)
|
||||
`finally` cleanupInitialization sab
|
||||
Nothing -> inittempremote
|
||||
`finally` cleanupInitialization sab
|
||||
Nothing -> specialRemoteFromUrl sab
|
||||
(initremote remotename >>= a)
|
||||
Nothing -> specialRemoteFromUrl sab inittempremote
|
||||
where
|
||||
-- Initialize a new special remote with the provided configuration
|
||||
-- and name.
|
||||
|
@ -869,27 +871,48 @@ getRepo = getEnv "GIT_WORK_TREE" >>= \case
|
|||
|
||||
-- Records what the git-annex branch was at the beginning of this command.
|
||||
data StartAnnexBranch
|
||||
= AnnexBranchExistedAlready Ref
|
||||
| AnnexBranchCreatedEmpty Ref
|
||||
= AnnexBranchExistedAlready Sha
|
||||
| AnnexBranchCreatedEmpty Sha
|
||||
|
||||
{- Run early in the command, gets the initial state of the git-annex
|
||||
- branch.
|
||||
-
|
||||
- If the branch does not exist yet, it's created here. This is done
|
||||
- because it's hard to avoid the branch being created by this command,
|
||||
- so tracking the sha of the created branch allows cleaning it up later.
|
||||
-}
|
||||
startAnnexBranch :: Annex StartAnnexBranch
|
||||
startAnnexBranch = ifM (null <$> Annex.Branch.siblingBranches)
|
||||
( AnnexBranchCreatedEmpty <$> Annex.Branch.getBranch
|
||||
, AnnexBranchExistedAlready <$> Annex.Branch.getBranch
|
||||
)
|
||||
|
||||
-- This is run after git has used this process to fetch or push from a
|
||||
-- special remote that was specified using a git-annex url. If the git
|
||||
-- repository was not initialized for use by git-annex already, it is still
|
||||
-- not initialized at this point.
|
||||
-- This runs an action that will set up a special remote that
|
||||
-- was specified using an annex url.
|
||||
--
|
||||
-- Setting up a special remote needs to write its config to the git-annex
|
||||
-- branch. And using a special remote may also write to the branch.
|
||||
-- But in this case, writes to the git-annex branch need to be avoided,
|
||||
-- so that cleanupInitialization can leave things in the right state.
|
||||
--
|
||||
-- So this prevents commits to the git-annex branch, and redirects all
|
||||
-- journal writes to a temporary directory, so that all writes
|
||||
-- to the git-annex branch by the action will be discarded.
|
||||
specialRemoteFromUrl :: StartAnnexBranch -> Annex a -> Annex a
|
||||
specialRemoteFromUrl sab a = withTmpDir "journal" $ \tmpdir -> do
|
||||
Annex.overrideGitConfig $ \c ->
|
||||
c { annexAlwaysCommit = False }
|
||||
Annex.BranchState.changeState $ \st ->
|
||||
st { alternateJournal = Just (toRawFilePath tmpdir) }
|
||||
a `finally` cleanupInitialization sab
|
||||
|
||||
-- If the git-annex branch did not exist when this command started,
|
||||
-- the current contents of it were created in passing by this command,
|
||||
-- which is hard to avoid. But if a git-annex branch is fetched from the
|
||||
-- special remote and contains Differences, it would not be possible to
|
||||
-- merge it into the git-annex branch that was created while running this
|
||||
-- command. To avoid that problem, when the git-annex branch was created
|
||||
-- at the start of this command, it's deleted.
|
||||
-- it was created empty by this command, and this command has avoided
|
||||
-- making any other commits to it. If nothing else has written to the
|
||||
-- branch while this command was running, the branch will be deleted.
|
||||
-- That allows for the git-annex branch that is fetched from the special
|
||||
-- remote to contain Differences, which would prevent it from being merged
|
||||
-- with the git-annex branch created by this command.
|
||||
--
|
||||
-- If there is still not a sibling git-annex branch, this deletes all annex
|
||||
-- objects for git bundles from the annex objects directory, and deletes
|
||||
|
@ -905,10 +928,11 @@ cleanupInitialization :: StartAnnexBranch -> Annex ()
|
|||
cleanupInitialization sab = do
|
||||
case sab of
|
||||
AnnexBranchExistedAlready _ -> noop
|
||||
AnnexBranchCreatedEmpty _ -> do
|
||||
inRepo $ Git.Branch.delete Annex.Branch.fullname
|
||||
indexfile <- fromRepo gitAnnexIndex
|
||||
liftIO $ removeWhenExistsWith R.removeLink indexfile
|
||||
AnnexBranchCreatedEmpty r ->
|
||||
whenM ((r ==) <$> Annex.Branch.getBranch) $ do
|
||||
inRepo $ Git.Branch.delete Annex.Branch.fullname
|
||||
indexfile <- fromRepo gitAnnexIndex
|
||||
liftIO $ removeWhenExistsWith R.removeLink indexfile
|
||||
ifM Annex.Branch.hasSibling
|
||||
( do
|
||||
autoInitialize' (pure True) remoteList
|
||||
|
|
|
@ -36,7 +36,10 @@ data BranchState = BranchState
|
|||
-- process need to be noticed while the current process is running?
|
||||
-- (This makes the journal always be read, and avoids using the
|
||||
-- cache.)
|
||||
, alternateJournal :: Maybe RawFilePath
|
||||
-- ^ use this directory for all journals, rather than the
|
||||
-- gitAnnexJournalDir and gitAnnexPrivateJournalDir.
|
||||
}
|
||||
|
||||
startBranchState :: BranchState
|
||||
startBranchState = BranchState False False False [] [] [] False
|
||||
startBranchState = BranchState False False False [] [] [] False Nothing
|
||||
|
|
|
@ -10,26 +10,6 @@ will be available to users who don't use datalad.
|
|||
|
||||
This is implememented and working. Remaining todo list for it:
|
||||
|
||||
* Cloning writes the new special remote config into remote.log,
|
||||
and *deletes* other special remote configs.
|
||||
|
||||
The remote config from the url may be slightly different as well
|
||||
than the existing one. Cloning should not write it.
|
||||
|
||||
* The race condition described in
|
||||
[[!commit 797f27ab0517e0021363791ff269300f2ba095a5]]
|
||||
where before git-annex init is run in a repo,
|
||||
using git-remote-annex and at the same time git-annex init can lose
|
||||
changes that the latter command (and ones after it) write to the
|
||||
git-annex branch.
|
||||
|
||||
This should be fixable by making git-remote-annex not write to the
|
||||
git-annex branch, but to eg, a temporary journal directory.
|
||||
|
||||
Also, when the remote uses importtree=yes, pushing to it updates
|
||||
content identifiers, which currently get recorded in the git-annex
|
||||
branch. It would be good to avoid that being written as well.
|
||||
|
||||
* Test incremental push edge cases involving checkprereq.
|
||||
|
||||
* Cloning from an annex:: url with importtree=yes doesn't work
|
||||
|
|
Loading…
Add table
Reference in a new issue