git-remote-annex support exporttree=yes remotes

Put the annex objects in .git/annex/objects/ inside the export remote.
This way, when importing from the remote, they will be filtered out.

Note that, when importtree=yes, content identifiers are used, and this
means that pushing to a remote updates the git-annex branch. Urk.
Will need to try to prevent that later, but I already had a todo about
that for other reasons.

Untested!

Sponsored-By: Brock Spratlen on Patreon
This commit is contained in:
Joey Hess 2024-05-13 11:37:47 -04:00
parent 3f848564ac
commit 34eae54ff9
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 151 additions and 47 deletions

View file

@ -25,15 +25,19 @@ import qualified Types.Remote as Remote
import Annex.Transfer import Annex.Transfer
import Backend.GitRemoteAnnex import Backend.GitRemoteAnnex
import Config import Config
import Types.Key
import Types.RemoteConfig import Types.RemoteConfig
import Types.ProposedAccepted import Types.ProposedAccepted
import Types.Key import Types.Export
import Types.GitConfig import Types.GitConfig
import Types.Difference
import Git.Types import Git.Types
import Logs.Difference import Logs.Difference
import Annex.Init import Annex.Init
import Annex.UUID
import Annex.Content import Annex.Content
import Annex.Perms import Annex.Perms
import Annex.SpecialRemote.Config
import Remote.List import Remote.List
import Remote.List.Util import Remote.List.Util
import Utility.Tmp import Utility.Tmp
@ -45,8 +49,9 @@ import Data.Either
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Char8 as B8
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import qualified Data.Set as S
run :: [String] -> IO () run :: [String] -> IO ()
run (remotename:url:[]) = run (remotename:url:[]) =
@ -526,6 +531,28 @@ getEnabledSpecialRemoteByName remotename =
maybe (return (Just rmt)) giveup maybe (return (Just rmt)) giveup
(checkSpecialRemoteProblems rmt) (checkSpecialRemoteProblems rmt)
parseManifest :: B.ByteString -> Either String Manifest
parseManifest b =
let (outks, inks) = partitionEithers $ map parseline $ B8.lines b
in case (checkvalid [] inks, checkvalid [] outks) of
(Right inks', Right outks') ->
Right $ mkManifest inks' outks'
(Left err, _) -> Left err
(_, Left err) -> Left err
where
parseline l
| "-" `B.isPrefixOf` l =
Left $ deserializeKey' $ B.drop 1 l
| otherwise =
Right $ deserializeKey' l
checkvalid c [] = Right (reverse c)
checkvalid c (Just k:ks) = case fromKey keyVariety k of
GitBundleKey -> checkvalid (k:c) ks
_ -> Left $ "Wrong type of key in manifest " ++ serializeKey k
checkvalid _ (Nothing:_) =
Left "Error parsing manifest"
-- Avoid using special remotes that are thirdparty populated, because -- Avoid using special remotes that are thirdparty populated, because
-- there is no way to push the git repository keys into one. -- there is no way to push the git repository keys into one.
-- --
@ -555,38 +582,39 @@ downloadManifestOrFail rmt =
-- Throws errors if the remote cannot be accessed or the download fails, -- Throws errors if the remote cannot be accessed or the download fails,
-- or if the manifest file cannot be parsed. -- or if the manifest file cannot be parsed.
downloadManifest :: Remote -> Annex (Maybe Manifest) downloadManifest :: Remote -> Annex (Maybe Manifest)
downloadManifest rmt = ifM (Remote.checkPresent rmt mk) downloadManifest rmt = getKeyExportLocations rmt mk >>= \case
-- Downloads to a temporary file, rather than using Nothing -> ifM (Remote.checkPresent rmt mk)
-- the usual Annex.Transfer.download. The content of manifests is ( gettotmp $ \tmp ->
-- not stable, and so it needs to re-download it fresh every time. Remote.retrieveKeyFile rmt mk
( withTmpFile "GITMANIFEST" $ \tmp tmph -> do (AssociatedFile Nothing) tmp
liftIO $ hClose tmph nullMeterUpdate Remote.NoVerify
_ <- Remote.retrieveKeyFile rmt mk , return Nothing
(AssociatedFile Nothing) tmp )
nullMeterUpdate Remote.NoVerify Just locs -> getexport locs
(outks, inks) <- partitionEithers . map parseline . B8.lines
<$> liftIO (B.readFile tmp)
m <- mkManifest
<$> checkvalid [] inks
<*> checkvalid [] outks
return (Just m)
, return Nothing
)
where where
mk = genManifestKey (Remote.uuid rmt) mk = genManifestKey (Remote.uuid rmt)
checkvalid c [] = return (reverse c) -- Downloads to a temporary file, rather than using eg
checkvalid c (Just k:ks) = case fromKey keyVariety k of -- Annex.Transfer.download that would put it in the object
GitBundleKey -> checkvalid (k:c) ks -- directory. The content of manifests is not stable, and so
_ -> giveup $ "Wrong type of key in manifest " ++ serializeKey k -- it needs to re-download it fresh every time, and the object
checkvalid _ (Nothing:_) = -- file should not be stored locally.
giveup $ "Error parsing manifest " ++ serializeKey mk gettotmp dl = withTmpFile "GITMANIFEST" $ \tmp tmph -> do
liftIO $ hClose tmph
_ <- dl tmp
b <- liftIO (B.readFile tmp)
case parseManifest b of
Right m -> return (Just m)
Left err -> giveup err
parseline l getexport [] = return Nothing
| "-" `B.isPrefixOf` l = getexport (loc:locs) =
Left $ deserializeKey' $ B.drop 1 l ifM (Remote.checkPresentExport (Remote.exportActions rmt) mk loc)
| otherwise = ( gettotmp $ \tmp ->
Right $ deserializeKey' l Remote.retrieveExport (Remote.exportActions rmt)
mk loc tmp nullMeterUpdate
, getexport locs
)
-- Uploads the Manifest to the remote. -- Uploads the Manifest to the remote.
-- --
@ -610,7 +638,7 @@ uploadManifest rmt manifest =
B8.hPutStrLn tmph (serializeKey' bundlekey) B8.hPutStrLn tmph (serializeKey' bundlekey)
liftIO $ hClose tmph liftIO $ hClose tmph
-- Remove old manifest if present. -- Remove old manifest if present.
Remote.removeKey rmt mk dropKey' rmt mk
-- storeKey needs the key to be in the annex objects -- storeKey needs the key to be in the annex objects
-- directory, so put the manifest file there temporarily. -- directory, so put the manifest file there temporarily.
-- Using linkOrCopy rather than moveAnnex to avoid updating -- Using linkOrCopy rather than moveAnnex to avoid updating
@ -622,9 +650,8 @@ uploadManifest rmt manifest =
linkOrCopy mk (toRawFilePath tmp) objfile Nothing linkOrCopy mk (toRawFilePath tmp) objfile Nothing
unless (isJust res) unless (isJust res)
uploadfailed uploadfailed
-- noRetry because manifest content is not stable ok <- (uploadGitObject rmt mk >> pure True)
ok <- upload rmt mk (AssociatedFile Nothing) `catchNonAsync` (const (pure False))
noRetry noNotification
-- Don't leave the manifest key in the annex objects -- Don't leave the manifest key in the annex objects
-- directory. -- directory.
unlinkAnnex mk unlinkAnnex mk
@ -650,22 +677,46 @@ uploadManifest rmt manifest =
-- 3. Git bundle objects are not usually transferred between repositories -- 3. Git bundle objects are not usually transferred between repositories
-- except special remotes (although the user can if they want to). -- except special remotes (although the user can if they want to).
downloadGitBundle :: Remote -> Key -> Annex FilePath downloadGitBundle :: Remote -> Key -> Annex FilePath
downloadGitBundle rmt k = downloadGitBundle rmt k = getKeyExportLocations rmt k >>= \case
ifM (download rmt k (AssociatedFile Nothing) stdRetry noNotification) Nothing -> dlwith $
download rmt k (AssociatedFile Nothing) stdRetry noNotification
Just locs -> dlwith $
anyM getexport locs
where
dlwith a = ifM a
( decodeBS <$> calcRepo (gitAnnexLocation k) ( decodeBS <$> calcRepo (gitAnnexLocation k)
, giveup $ "Failed to download " ++ serializeKey k , giveup $ "Failed to download " ++ serializeKey k
) )
-- Uploads a git bundle from the annex objects directory to the remote. getexport loc = catchNonAsync (getexport' loc) (const (pure False))
getexport' loc =
getViaTmp rsp vc k (AssociatedFile Nothing) Nothing $ \tmp -> do
v <- Remote.retrieveExport (Remote.exportActions rmt)
k loc (decodeBS tmp) nullMeterUpdate
return (True, v)
rsp = Remote.retrievalSecurityPolicy rmt
vc = Remote.RemoteVerify rmt
-- Uploads a bundle or manifest object from the annex objects directory
-- to the remote.
-- --
-- Throws errors if the upload fails. -- Throws errors if the upload fails.
-- --
-- This does not update the location log to indicate that the remote -- This does not update the location log to indicate that the remote
-- contains the git bundle object. -- contains the git object.
uploadGitBundle :: Remote -> Key -> Annex () uploadGitObject :: Remote -> Key -> Annex ()
uploadGitBundle rmt k = uploadGitObject rmt k = getKeyExportLocations rmt k >>= \case
unlessM (upload rmt k (AssociatedFile Nothing) stdRetry noNotification) $ Just (loc:_) -> do
giveup $ "Failed to upload " ++ serializeKey k objfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation k)
Remote.storeExport (Remote.exportActions rmt) objfile k loc nullMeterUpdate
_ ->
unlessM (upload rmt k (AssociatedFile Nothing) retry noNotification) $
giveup $ "Failed to upload " ++ serializeKey k
where
retry = case fromKey keyVariety k of
GitBundleKey -> stdRetry
-- Manifest keys are not stable
_ -> noRetry
-- Generates a git bundle, ingests it into the local objects directory, -- Generates a git bundle, ingests it into the local objects directory,
-- and uploads its key to the special remote. -- and uploads its key to the special remote.
@ -689,12 +740,12 @@ generateAndUploadGitBundle rmt bs manifest =
unless (bundlekey `elem` (inManifest manifest)) $ do unless (bundlekey `elem` (inManifest manifest)) $ do
unlessM (moveAnnex bundlekey (AssociatedFile Nothing) (toRawFilePath tmp)) $ unlessM (moveAnnex bundlekey (AssociatedFile Nothing) (toRawFilePath tmp)) $
giveup "Unable to push" giveup "Unable to push"
uploadGitBundle rmt bundlekey uploadGitObject rmt bundlekey
`onException` unlinkAnnex bundlekey `onException` unlinkAnnex bundlekey
return bundlekey return bundlekey
dropKey :: Remote -> Key -> Annex Bool dropKey :: Remote -> Key -> Annex Bool
dropKey rmt k = tryNonAsync (Remote.removeKey rmt k) >>= \case dropKey rmt k = tryNonAsync (dropKey' rmt k) >>= \case
Right () -> return True Right () -> return True
Left ex -> do Left ex -> do
liftIO $ hPutStrLn stderr $ liftIO $ hPutStrLn stderr $
@ -703,6 +754,49 @@ dropKey rmt k = tryNonAsync (Remote.removeKey rmt k) >>= \case
++ " (" ++ show ex ++ ")" ++ " (" ++ show ex ++ ")"
return False return False
dropKey' :: Remote -> Key -> Annex ()
dropKey' rmt k = getKeyExportLocations rmt k >>= \case
Nothing -> Remote.removeKey rmt k
Just locs -> forM_ locs $ \loc ->
Remote.removeExport (Remote.exportActions rmt) k loc
getKeyExportLocations :: Remote -> Key -> Annex (Maybe [ExportLocation])
getKeyExportLocations rmt k = do
cfg <- Annex.getGitConfig
u <- getUUID
return $ keyExportLocations rmt k cfg u
-- When the remote contains a tree, the git keys are stored
-- inside the .git/annex/objects/ directory in the remote.
--
-- The first ExportLocation in the returned list is the one that
-- is the same as the local repository would use. But it's possible
-- that one of the others in the list was used by another repository to
-- upload a git key.
keyExportLocations :: Remote -> Key -> GitConfig -> UUID -> Maybe [ExportLocation]
keyExportLocations rmt k cfg uuid
| exportTree (Remote.config rmt) || importTree (Remote.config rmt) =
Just $ map (\p -> mkExportLocation (".git" P.</> p)) $
concatMap (`annexLocationsNonBare` k) cfgs
| otherwise = Nothing
where
-- When git-annex has not been initialized yet (eg, when cloning),
-- the Differences are unknown, so make a version of the GitConfig
-- with and without the OneLevelObjectHash difference.
cfgs
| uuid /= NoUUID = [cfg]
| hasDifference OneLevelObjectHash (annexDifferences cfg) =
[ cfg
, cfg { annexDifferences = mempty }
]
| otherwise =
[ cfg
, cfg
{ annexDifferences = mkDifferences
(S.singleton OneLevelObjectHash)
}
]
-- Tracking refs are used to remember the refs that are currently on the -- Tracking refs are used to remember the refs that are currently on the
-- remote. This is different from git's remote tracking branches, since it -- remote. This is different from git's remote tracking branches, since it
-- needs to track all refs on the remote, not only the refs that the user -- needs to track all refs on the remote, not only the refs that the user

View file

@ -17,6 +17,7 @@ module Types.Difference (
differenceConfigVal, differenceConfigVal,
hasDifference, hasDifference,
listDifferences, listDifferences,
mkDifferences,
) where ) where
import Utility.PartialPrelude import Utility.PartialPrelude

View file

@ -18,6 +18,11 @@ and are in the process of being deleted.
(Lines end with unix `"\n"`, not `"\r\n"`.) (Lines end with unix `"\n"`, not `"\r\n"`.)
# exporttree=yes remotes
In an exporttree=yes remote, the GITMANIFEST and GITBUNDLE objects are
stored in the remote, under the `.git/annex/objects/` path.
# multiple GITMANIFEST files # multiple GITMANIFEST files
Usually there will only be one per special remote, but it's possible for Usually there will only be one per special remote, but it's possible for
@ -38,6 +43,6 @@ stored in such a special remote, this procedure will work:
(Note that later bundles can update refs from the versions in previous (Note that later bundles can update refs from the versions in previous
bundles.) bundles.)
When the special remote is encryptee, the GITMANIFEST and GITBUNDLE will When the special remote is encrypted, the GITMANIFEST and GITBUNDLE will
also be encrypted. To decrypt those manually, see this also be encrypted. To decrypt those manually, see this
[[fairly simple shell script using standard tools|tips/Decrypting_files_in_special_remotes_without_git-annex]]. [[fairly simple shell script using standard tools|tips/Decrypting_files_in_special_remotes_without_git-annex]].

View file

@ -12,7 +12,7 @@ This is implememented and working. Remaining todo list for it:
* Need to test all types of pushes, barely tested at all. * Need to test all types of pushes, barely tested at all.
* Support exporttree=yes remotes. * Need to test exporttree=yes remotes.
* Support max-bundles config * Support max-bundles config
@ -35,7 +35,7 @@ This is implememented and working. Remaining todo list for it:
where the remote is left with a deleted manifest when a push where the remote is left with a deleted manifest when a push
is interrupted part way through. This should be recoverable is interrupted part way through. This should be recoverable
by caching the manifest locally and re-uploading it when by caching the manifest locally and re-uploading it when
the remote has no manifest. the remote has no manifest or prompting the user to merge and re-push.
* datalad-annex supports cloning from the web special remote, * datalad-annex supports cloning from the web special remote,
using an url that contains the result of pushing to eg, a directory using an url that contains the result of pushing to eg, a directory
@ -87,3 +87,7 @@ This is implememented and working. Remaining todo list for it:
This should be fixable by making git-remote-annex not write to the This should be fixable by making git-remote-annex not write to the
git-annex branch, but to eg, a temporary journal directory. git-annex branch, but to eg, a temporary journal directory.
Also, when the remote uses importree=yes, pushing to it updates
content identifiers, which currently get recorded in the git-annex
branch. It would be good to avoid that being written as well.