git-remote-annex: avoid bundle object leakage in push race or interrupted push
Locally record the manifest before uploading it or any bundles, and read it on the next push. Any bundles from the push that are not included in the currently being pushed manifest will get added to the outManifest, and so eventually get deleted. This deals with an interrupted push that is not resumed and instead something else is pushed. And it deals with a push race that overwrites the manifest. Of course, this can't help if one of those situations is followed by the local repo being deleted. But that's equivilant to doing a git-annex copy of a new annexed file to a special remote and then deleting the special repo w/o pushing. In either case the special remote ends up with a object in it that git-annex doesn't know about.
This commit is contained in:
parent
4a77c77d2e
commit
1a3c60cc8e
2 changed files with 62 additions and 32 deletions
|
@ -13,6 +13,7 @@ import Annex.Common
|
|||
import Types.GitRemoteAnnex
|
||||
import qualified Annex
|
||||
import qualified Remote
|
||||
import qualified Git
|
||||
import qualified Git.CurrentRepo
|
||||
import qualified Git.Ref
|
||||
import qualified Git.Branch
|
||||
|
@ -37,6 +38,7 @@ import Types.BranchState
|
|||
import Types.Difference
|
||||
import Types.Crypto
|
||||
import Git.Types
|
||||
import Logs.File
|
||||
import Logs.Difference
|
||||
import Annex.Init
|
||||
import Annex.UUID
|
||||
|
@ -267,10 +269,12 @@ fullPush st rmt refs = guardPush st $ do
|
|||
oldmanifest <- maybe (downloadManifestWhenPresent rmt) pure
|
||||
(manifestCache st)
|
||||
let bs = map Git.Bundle.fullBundleSpec refs
|
||||
bundlekey <- generateAndUploadGitBundle rmt bs oldmanifest
|
||||
(bundlekey, uploadbundle) <- generateGitBundle rmt bs oldmanifest
|
||||
let manifest = mkManifest [bundlekey]
|
||||
(inManifest oldmanifest ++ outManifest oldmanifest)
|
||||
uploadManifest rmt manifest
|
||||
manifest' <- startPush rmt manifest
|
||||
uploadbundle
|
||||
uploadManifest rmt manifest'
|
||||
return (True, st { manifestCache = Nothing })
|
||||
|
||||
guardPush :: State -> Annex (Bool, State) -> Annex (Bool, State)
|
||||
|
@ -287,8 +291,11 @@ incrementalPush :: State -> Remote -> M.Map Ref Sha -> M.Map Ref Sha -> Annex (B
|
|||
incrementalPush st rmt oldtrackingrefs newtrackingrefs = guardPush st $ do
|
||||
oldmanifest <- maybe (downloadManifestWhenPresent rmt) pure (manifestCache st)
|
||||
bs <- calc [] (M.toList newtrackingrefs)
|
||||
bundlekey <- generateAndUploadGitBundle rmt bs oldmanifest
|
||||
uploadManifest rmt (oldmanifest <> mkManifest [bundlekey] [])
|
||||
(bundlekey, uploadbundle) <- generateGitBundle rmt bs oldmanifest
|
||||
let manifest = oldmanifest <> mkManifest [bundlekey] []
|
||||
manifest' <- startPush rmt manifest
|
||||
uploadbundle
|
||||
uploadManifest rmt manifest'
|
||||
return (True, st { manifestCache = Nothing })
|
||||
where
|
||||
calc c [] = return (reverse c)
|
||||
|
@ -356,8 +363,10 @@ pushEmpty st rmt = guardPush st $ do
|
|||
(manifestCache st)
|
||||
let manifest = mkManifest mempty
|
||||
(inManifest oldmanifest ++ outManifest oldmanifest)
|
||||
manifest' <- dropOldKeys rmt manifest
|
||||
uploadManifest rmt manifest'
|
||||
(manifest', manifestwriter) <- startPush' rmt manifest
|
||||
manifest'' <- dropOldKeys rmt manifest'
|
||||
manifestwriter manifest''
|
||||
uploadManifest rmt manifest''
|
||||
return (True, st { manifestCache = Nothing })
|
||||
|
||||
data RefSpec = RefSpec
|
||||
|
@ -678,6 +687,38 @@ uploadManifest rmt manifest = do
|
|||
unlinkAnnex mk
|
||||
return ok
|
||||
|
||||
{- A manifest file is cached here before it or the bundles listed in it
|
||||
- is uploaded to the special remote.
|
||||
-
|
||||
- This prevents forgetting which bundles were uploaded when a push gets
|
||||
- interrupted before updating the manifest on the remote, or when a race
|
||||
- causes the uploaded manigest to be overwritten.
|
||||
-}
|
||||
lastPushedManifestFile :: UUID -> Git.Repo -> RawFilePath
|
||||
lastPushedManifestFile u r = gitAnnexDir r P.</> "git-remote-annex"
|
||||
P.</> fromUUID u P.</> "manifest"
|
||||
|
||||
{- Call before uploading anything. The returned manifest has added
|
||||
- to it any bundle keys that were in the lastPushedManifestFile
|
||||
- and that are not in the new manifest. -}
|
||||
startPush :: Remote -> Manifest -> Annex Manifest
|
||||
startPush rmt manifest = do
|
||||
(manifest', writer) <- startPush' rmt manifest
|
||||
writer manifest'
|
||||
return manifest'
|
||||
|
||||
startPush' :: Remote -> Manifest -> Annex (Manifest, Manifest -> Annex ())
|
||||
startPush' rmt manifest = do
|
||||
f <- fromRepo (lastPushedManifestFile (Remote.uuid rmt))
|
||||
oldmanifest <- liftIO $
|
||||
fromRight mempty . parseManifest
|
||||
<$> B.readFile (fromRawFilePath f)
|
||||
`catchNonAsync` (const (pure mempty))
|
||||
let manifest' = manifest <> mkManifest []
|
||||
(inManifest oldmanifest ++ outManifest oldmanifest)
|
||||
let writer = writeLogFile f . decodeBS . formatManifest
|
||||
return (manifest', writer)
|
||||
|
||||
-- Drops the outManifest keys. Returns a version of the manifest with
|
||||
-- any outManifest keys that were successfully dropped removed from it.
|
||||
--
|
||||
|
@ -767,30 +808,34 @@ uploadGitObject rmt k = getKeyExportLocations rmt k >>= \case
|
|||
_ -> noRetry
|
||||
|
||||
-- Generates a git bundle, ingests it into the local objects directory,
|
||||
-- and uploads its key to the special remote.
|
||||
-- and returns an action that uploads its key to the special remote.
|
||||
--
|
||||
-- If the key is already present in the provided manifest, avoids
|
||||
-- uploading it.
|
||||
-- ingesting or uploading it.
|
||||
--
|
||||
-- On failure, an exception is thrown, and nothing is added to the local
|
||||
-- objects directory.
|
||||
generateAndUploadGitBundle
|
||||
generateGitBundle
|
||||
:: Remote
|
||||
-> [Git.Bundle.BundleSpec]
|
||||
-> Manifest
|
||||
-> Annex Key
|
||||
generateAndUploadGitBundle rmt bs manifest =
|
||||
-> Annex (Key, Annex ())
|
||||
generateGitBundle rmt bs manifest =
|
||||
withTmpFile "GITBUNDLE" $ \tmp tmph -> do
|
||||
liftIO $ hClose tmph
|
||||
inRepo $ Git.Bundle.create tmp bs
|
||||
bundlekey <- genGitBundleKey (Remote.uuid rmt)
|
||||
(toRawFilePath tmp) nullMeterUpdate
|
||||
unless (bundlekey `elem` (inManifest manifest)) $ do
|
||||
unlessM (moveAnnex bundlekey (AssociatedFile Nothing) (toRawFilePath tmp)) $
|
||||
giveup "Unable to push"
|
||||
uploadGitObject rmt bundlekey
|
||||
`onException` unlinkAnnex bundlekey
|
||||
return bundlekey
|
||||
if (bundlekey `notElem` inManifest manifest)
|
||||
then do
|
||||
unlessM (moveAnnex bundlekey (AssociatedFile Nothing) (toRawFilePath tmp)) $
|
||||
giveup "Unable to push"
|
||||
return (bundlekey, uploadaction bundlekey)
|
||||
else return (bundlekey, noop)
|
||||
where
|
||||
uploadaction bundlekey =
|
||||
uploadGitObject rmt bundlekey
|
||||
`onException` unlinkAnnex bundlekey
|
||||
|
||||
dropKey :: Remote -> Key -> Annex Bool
|
||||
dropKey rmt k = tryNonAsync (dropKey' rmt k) >>= \case
|
||||
|
|
|
@ -36,18 +36,3 @@ This is implememented and working. Remaining todo list for it:
|
|||
special remote.
|
||||
`datalad-annex::https://example.com?type=web&url={noquery}`
|
||||
Supporting something like this would be good.
|
||||
|
||||
* A push race can overwrite a manifest that had a GITBUNDLE added to it,
|
||||
and so lose track of a GITBUNDLE. That will never get deleted.
|
||||
|
||||
Could git-annex detect this after the fact and clean it up? Eg,
|
||||
if it caches the last manifest it uploaded, the next time it downloads
|
||||
the manifest it can check if there are bundle files in the cached
|
||||
manifest that are not in the new one, that still exist in the remote.
|
||||
If so, it can add those to the outManifest, to get dropped later.
|
||||
|
||||
This wouldn't fix the case where a push race happens and then the repo
|
||||
whose manifest was overwritten gets deleted. But this is similar to
|
||||
adding a file to the annex, copying it to the special remote, and then
|
||||
deleting the repo w/o sending the git-annex branch anywhere, which leaves
|
||||
a dangling object too. So don't worry about that.
|
||||
|
|
Loading…
Reference in a new issue