diff --git a/Backend/GitRemoteAnnex.hs b/Backend/GitRemoteAnnex.hs index d550a0ddff..13c6b2820f 100644 --- a/Backend/GitRemoteAnnex.hs +++ b/Backend/GitRemoteAnnex.hs @@ -84,9 +84,10 @@ genGitBundleKey remoteuuid file meterupdate = do , keySize = Just filesize } -genManifestKey :: UUID -> Key -genManifestKey u = mkKey $ \kd -> kd - { keyName = S.toShort (fromUUID u) +genManifestKey :: UUID -> Maybe S.ShortByteString -> Key +genManifestKey u extension = mkKey $ \kd -> kd + { keyName = S.toShort (fromUUID u) <> + maybe mempty ("." <>) extension , keyVariety = GitManifestKey } @@ -99,7 +100,14 @@ isGitRemoteAnnexKey u k = -- Remove the checksum that comes after the UUID. let b' = B8.dropWhileEnd (/= '-') b in B8.take (B8.length b' - 1) b' - GitManifestKey -> sameuuid id + GitManifestKey -> sameuuid $ \b -> + -- Remove an optional extension after the UUID. + -- (A UUID never contains '.') + if '.' `B8.elem` b + then + let b' = B8.dropWhileEnd (/= '.') b + in B8.take (B8.length b' - 1) b' + else b _ -> False where sameuuid f = fromUUID u == f (S.fromShort (fromKey keyName k)) diff --git a/CmdLine/GitRemoteAnnex.hs b/CmdLine/GitRemoteAnnex.hs index 281d943510..4ec8b9dc03 100644 --- a/CmdLine/GitRemoteAnnex.hs +++ b/CmdLine/GitRemoteAnnex.hs @@ -585,17 +585,20 @@ downloadManifestOrFail rmt = -- Throws errors if the remote cannot be accessed or the download fails, -- or if the manifest file cannot be parsed. downloadManifest :: Remote -> Annex (Maybe Manifest) -downloadManifest rmt = getKeyExportLocations rmt mk >>= \case - Nothing -> ifM (Remote.checkPresent rmt mk) - ( gettotmp $ \tmp -> - Remote.retrieveKeyFile rmt mk - (AssociatedFile Nothing) tmp - nullMeterUpdate Remote.NoVerify - , return Nothing - ) - Just locs -> getexport locs +downloadManifest rmt = get mkmain >>= maybe (get mkbak) (pure . Just) where - mk = genManifestKey (Remote.uuid rmt) + mkmain = genManifestKey (Remote.uuid rmt) Nothing + mkbak = genManifestKey (Remote.uuid rmt) (Just "bak") + + get mk = getKeyExportLocations rmt mk >>= \case + Nothing -> ifM (Remote.checkPresent rmt mk) + ( gettotmp $ \tmp -> + Remote.retrieveKeyFile rmt mk + (AssociatedFile Nothing) tmp + nullMeterUpdate Remote.NoVerify + , return Nothing + ) + Just locs -> getexport mk locs -- Downloads to a temporary file, rather than using eg -- Annex.Transfer.download that would put it in the object @@ -610,13 +613,13 @@ downloadManifest rmt = getKeyExportLocations rmt mk >>= \case Right m -> return (Just m) Left err -> giveup err - getexport [] = return Nothing - getexport (loc:locs) = + getexport _ [] = return Nothing + getexport mk (loc:locs) = ifM (Remote.checkPresentExport (Remote.exportActions rmt) mk loc) ( gettotmp $ \tmp -> Remote.retrieveExport (Remote.exportActions rmt) mk loc tmp nullMeterUpdate - , getexport locs + , getexport mk locs ) -- Uploads the Manifest to the remote. @@ -628,24 +631,43 @@ downloadManifest rmt = getKeyExportLocations rmt mk >>= \case -- and behavior of remotes is undefined when sending a key that is -- already present on the remote, but with different content. -- --- Note that if this is interrupted or loses access to the remote part --- way through, it may leave the remote without a manifest file. That will --- appear as if all refs have been deleted from the remote. --- XXX It should be possible to remember when that happened, by writing --- state to a file before, and then the next time git-remote-annex is run, it --- could recover from the situation. +-- So this may be interrupted and leave the manifest key not present. +-- To deal with that, there is a backup manifest key. This takes care +-- to ensure that one of the two keys will always exist. -- -- Once the manifest has been uploaded, attempts to drop all outManifest -- keys. A failure to drop does not cause an error to be thrown, because --- the push has already succeeded. +-- the push has already succeeded. Avoids re-uploading the manifest with +-- the dropped keys removed from outManifest, because dropping the keys +-- takes some time and another push may have already overwritten +-- the manifest in the meantime. uploadManifest :: Remote -> Manifest -> Annex () -uploadManifest rmt manifest = - withTmpFile "GITMANIFEST" $ \tmp tmph -> do - liftIO $ forM_ (inManifest manifest) $ \bundlekey -> - B8.hPutStrLn tmph (serializeKey' bundlekey) - liftIO $ hClose tmph - -- Remove old manifest if present. +uploadManifest rmt manifest = do + ok <- ifM (Remote.checkPresent rmt mkbak) + ( dropandput mkmain <&&> dropandput mkbak + -- The backup manifest doesn't exist, so upload + -- it first, and then the manifest second. + -- This ensures that at no point are both deleted. + , put mkbak <&&> dropandput mkmain + ) + if ok + then void $ dropOldKeys rmt manifest (const True) + else uploadfailed + where + mkmain = genManifestKey (Remote.uuid rmt) Nothing + mkbak = genManifestKey (Remote.uuid rmt) (Just "bak") + + uploadfailed = giveup "Failed to upload manifest." + + manifestcontent = B8.unlines $ map serializeKey' (inManifest manifest) + + dropandput mk = do dropKey' rmt mk + put mk + + put mk = withTmpFile "GITMANIFEST" $ \tmp tmph -> do + liftIO $ B8.hPut tmph manifestcontent + liftIO $ hClose tmph -- storeKey needs the key to be in the annex objects -- directory, so put the manifest file there temporarily. -- Using linkOrCopy rather than moveAnnex to avoid updating @@ -662,17 +684,7 @@ uploadManifest rmt manifest = -- Don't leave the manifest key in the annex objects -- directory. unlinkAnnex mk - if ok - -- Avoid re-uploading the manifest with - -- the dropped keys removed from outManifest, - -- because dropping the keys takes some time and - -- another push may have already overwritten the - -- manifest in the meantime. - then void $ dropOldKeys rmt manifest (const True) - else uploadfailed - where - mk = genManifestKey (Remote.uuid rmt) - uploadfailed = giveup $ "Failed to upload " ++ serializeKey mk + return ok -- Drops the outManifest keys. Returns a version of the manifest with -- any outManifest keys that were successfully dropped removed from it. diff --git a/doc/internals/git-remote-annex.mdwn b/doc/internals/git-remote-annex.mdwn index 8fff1eff4e..d390c07341 100644 --- a/doc/internals/git-remote-annex.mdwn +++ b/doc/internals/git-remote-annex.mdwn @@ -4,9 +4,10 @@ repository to a special remote, and later cloning from it. This adds two new key types to git-annex, GITMANIFEST and a GITBUNDLE. GITMANIFEST--$UUID is the manifest for a git repository stored in the -git-annex repository with that UUID. +git-annex repository with that UUID. When that is not present, +GITMANIFEST--$UUID.bak is a backup copy that can be used instead. -GITBUNDLE--$UUID-sha256 is a git bundle. +GITBUNDLE--$UUID-$sha256 is a git bundle. # format of the manifest file @@ -23,11 +24,10 @@ and are in the process of being deleted. In an exporttree=yes remote, the GITMANIFEST and GITBUNDLE objects are stored in the remote, under the `.git/annex/objects/` path. -# multiple GITMANIFEST files +# multiple special remotes in the same place -Usually there will only be one per special remote, but it's possible for -multiple special remotes to point to the same object storage, and if so -multiple GITMANIFEST objects can be stored. +It's possible for multiple special remotes to point to the same +object storage. This is why the UUID of the special remote is included in the GITMANIFEST key, and in the annex:: uri. diff --git a/doc/todo/git-remote-annex.mdwn b/doc/todo/git-remote-annex.mdwn index b5d7d5d37e..81ee7ac992 100644 --- a/doc/todo/git-remote-annex.mdwn +++ b/doc/todo/git-remote-annex.mdwn @@ -47,26 +47,6 @@ This is implememented and working. Remaining todo list for it: (with or without exporttree=yes). This is because the ContentIdentifier db is not populated. It should be possible to work around this. -* See XXX in uploadManifest about recovering from a situation - where the remote is left with a deleted manifest when a push - is interrupted part way through. - - This should be recoverable - by caching the manifest locally and re-uploading it when - the remote has no manifest or prompting the user to merge and re-push. - But, this leaves the remote unusable for fetching until that is dealt - with. - - Or, could have two identical manifest files, A and B. When pushing, first - delete and upload A. Then delete and upload B. When fetching, if A does - not exist, use B instead. However, allows for races and interruptions - that cause A and B to be out of sync, with one push in A and another in B. - - Once out of sync, in the window where a push has deleted but not - re-uploaded A yet, B will have a different content. So a fetch at that - point will see something that was pushed by a push that otherwise had - lost a push race. - * It would be nice if git-annex could generate an annex:: url for a special remote and show it to the user, eg when they have set the shorthand "annex::" url, so they know the full url.