git-remote-annex support exporttree=yes remotes
Put the annex objects in .git/annex/objects/ inside the export remote. This way, when importing from the remote, they will be filtered out. Note that, when importtree=yes, content identifiers are used, and this means that pushing to a remote updates the git-annex branch. Urk. Will need to try to prevent that later, but I already had a todo about that for other reasons. Untested! Sponsored-By: Brock Spratlen on Patreon
This commit is contained in:
parent
3f848564ac
commit
34eae54ff9
4 changed files with 151 additions and 47 deletions
|
@ -25,15 +25,19 @@ import qualified Types.Remote as Remote
|
||||||
import Annex.Transfer
|
import Annex.Transfer
|
||||||
import Backend.GitRemoteAnnex
|
import Backend.GitRemoteAnnex
|
||||||
import Config
|
import Config
|
||||||
|
import Types.Key
|
||||||
import Types.RemoteConfig
|
import Types.RemoteConfig
|
||||||
import Types.ProposedAccepted
|
import Types.ProposedAccepted
|
||||||
import Types.Key
|
import Types.Export
|
||||||
import Types.GitConfig
|
import Types.GitConfig
|
||||||
|
import Types.Difference
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Logs.Difference
|
import Logs.Difference
|
||||||
import Annex.Init
|
import Annex.Init
|
||||||
|
import Annex.UUID
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
import Annex.Perms
|
import Annex.Perms
|
||||||
|
import Annex.SpecialRemote.Config
|
||||||
import Remote.List
|
import Remote.List
|
||||||
import Remote.List.Util
|
import Remote.List.Util
|
||||||
import Utility.Tmp
|
import Utility.Tmp
|
||||||
|
@ -45,8 +49,9 @@ import Data.Either
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import qualified Data.ByteString.Char8 as B8
|
import qualified Data.ByteString.Char8 as B8
|
||||||
import qualified Data.Map.Strict as M
|
import qualified Data.Map.Strict as M
|
||||||
import System.FilePath.ByteString as P
|
import qualified System.FilePath.ByteString as P
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
import qualified Data.Set as S
|
||||||
|
|
||||||
run :: [String] -> IO ()
|
run :: [String] -> IO ()
|
||||||
run (remotename:url:[]) =
|
run (remotename:url:[]) =
|
||||||
|
@ -526,6 +531,28 @@ getEnabledSpecialRemoteByName remotename =
|
||||||
maybe (return (Just rmt)) giveup
|
maybe (return (Just rmt)) giveup
|
||||||
(checkSpecialRemoteProblems rmt)
|
(checkSpecialRemoteProblems rmt)
|
||||||
|
|
||||||
|
parseManifest :: B.ByteString -> Either String Manifest
|
||||||
|
parseManifest b =
|
||||||
|
let (outks, inks) = partitionEithers $ map parseline $ B8.lines b
|
||||||
|
in case (checkvalid [] inks, checkvalid [] outks) of
|
||||||
|
(Right inks', Right outks') ->
|
||||||
|
Right $ mkManifest inks' outks'
|
||||||
|
(Left err, _) -> Left err
|
||||||
|
(_, Left err) -> Left err
|
||||||
|
where
|
||||||
|
parseline l
|
||||||
|
| "-" `B.isPrefixOf` l =
|
||||||
|
Left $ deserializeKey' $ B.drop 1 l
|
||||||
|
| otherwise =
|
||||||
|
Right $ deserializeKey' l
|
||||||
|
|
||||||
|
checkvalid c [] = Right (reverse c)
|
||||||
|
checkvalid c (Just k:ks) = case fromKey keyVariety k of
|
||||||
|
GitBundleKey -> checkvalid (k:c) ks
|
||||||
|
_ -> Left $ "Wrong type of key in manifest " ++ serializeKey k
|
||||||
|
checkvalid _ (Nothing:_) =
|
||||||
|
Left "Error parsing manifest"
|
||||||
|
|
||||||
-- Avoid using special remotes that are thirdparty populated, because
|
-- Avoid using special remotes that are thirdparty populated, because
|
||||||
-- there is no way to push the git repository keys into one.
|
-- there is no way to push the git repository keys into one.
|
||||||
--
|
--
|
||||||
|
@ -555,38 +582,39 @@ downloadManifestOrFail rmt =
|
||||||
-- Throws errors if the remote cannot be accessed or the download fails,
|
-- Throws errors if the remote cannot be accessed or the download fails,
|
||||||
-- or if the manifest file cannot be parsed.
|
-- or if the manifest file cannot be parsed.
|
||||||
downloadManifest :: Remote -> Annex (Maybe Manifest)
|
downloadManifest :: Remote -> Annex (Maybe Manifest)
|
||||||
downloadManifest rmt = ifM (Remote.checkPresent rmt mk)
|
downloadManifest rmt = getKeyExportLocations rmt mk >>= \case
|
||||||
-- Downloads to a temporary file, rather than using
|
Nothing -> ifM (Remote.checkPresent rmt mk)
|
||||||
-- the usual Annex.Transfer.download. The content of manifests is
|
( gettotmp $ \tmp ->
|
||||||
-- not stable, and so it needs to re-download it fresh every time.
|
Remote.retrieveKeyFile rmt mk
|
||||||
( withTmpFile "GITMANIFEST" $ \tmp tmph -> do
|
(AssociatedFile Nothing) tmp
|
||||||
liftIO $ hClose tmph
|
nullMeterUpdate Remote.NoVerify
|
||||||
_ <- Remote.retrieveKeyFile rmt mk
|
, return Nothing
|
||||||
(AssociatedFile Nothing) tmp
|
)
|
||||||
nullMeterUpdate Remote.NoVerify
|
Just locs -> getexport locs
|
||||||
(outks, inks) <- partitionEithers . map parseline . B8.lines
|
|
||||||
<$> liftIO (B.readFile tmp)
|
|
||||||
m <- mkManifest
|
|
||||||
<$> checkvalid [] inks
|
|
||||||
<*> checkvalid [] outks
|
|
||||||
return (Just m)
|
|
||||||
, return Nothing
|
|
||||||
)
|
|
||||||
where
|
where
|
||||||
mk = genManifestKey (Remote.uuid rmt)
|
mk = genManifestKey (Remote.uuid rmt)
|
||||||
|
|
||||||
checkvalid c [] = return (reverse c)
|
-- Downloads to a temporary file, rather than using eg
|
||||||
checkvalid c (Just k:ks) = case fromKey keyVariety k of
|
-- Annex.Transfer.download that would put it in the object
|
||||||
GitBundleKey -> checkvalid (k:c) ks
|
-- directory. The content of manifests is not stable, and so
|
||||||
_ -> giveup $ "Wrong type of key in manifest " ++ serializeKey k
|
-- it needs to re-download it fresh every time, and the object
|
||||||
checkvalid _ (Nothing:_) =
|
-- file should not be stored locally.
|
||||||
giveup $ "Error parsing manifest " ++ serializeKey mk
|
gettotmp dl = withTmpFile "GITMANIFEST" $ \tmp tmph -> do
|
||||||
|
liftIO $ hClose tmph
|
||||||
|
_ <- dl tmp
|
||||||
|
b <- liftIO (B.readFile tmp)
|
||||||
|
case parseManifest b of
|
||||||
|
Right m -> return (Just m)
|
||||||
|
Left err -> giveup err
|
||||||
|
|
||||||
parseline l
|
getexport [] = return Nothing
|
||||||
| "-" `B.isPrefixOf` l =
|
getexport (loc:locs) =
|
||||||
Left $ deserializeKey' $ B.drop 1 l
|
ifM (Remote.checkPresentExport (Remote.exportActions rmt) mk loc)
|
||||||
| otherwise =
|
( gettotmp $ \tmp ->
|
||||||
Right $ deserializeKey' l
|
Remote.retrieveExport (Remote.exportActions rmt)
|
||||||
|
mk loc tmp nullMeterUpdate
|
||||||
|
, getexport locs
|
||||||
|
)
|
||||||
|
|
||||||
-- Uploads the Manifest to the remote.
|
-- Uploads the Manifest to the remote.
|
||||||
--
|
--
|
||||||
|
@ -610,7 +638,7 @@ uploadManifest rmt manifest =
|
||||||
B8.hPutStrLn tmph (serializeKey' bundlekey)
|
B8.hPutStrLn tmph (serializeKey' bundlekey)
|
||||||
liftIO $ hClose tmph
|
liftIO $ hClose tmph
|
||||||
-- Remove old manifest if present.
|
-- Remove old manifest if present.
|
||||||
Remote.removeKey rmt mk
|
dropKey' rmt mk
|
||||||
-- storeKey needs the key to be in the annex objects
|
-- storeKey needs the key to be in the annex objects
|
||||||
-- directory, so put the manifest file there temporarily.
|
-- directory, so put the manifest file there temporarily.
|
||||||
-- Using linkOrCopy rather than moveAnnex to avoid updating
|
-- Using linkOrCopy rather than moveAnnex to avoid updating
|
||||||
|
@ -622,9 +650,8 @@ uploadManifest rmt manifest =
|
||||||
linkOrCopy mk (toRawFilePath tmp) objfile Nothing
|
linkOrCopy mk (toRawFilePath tmp) objfile Nothing
|
||||||
unless (isJust res)
|
unless (isJust res)
|
||||||
uploadfailed
|
uploadfailed
|
||||||
-- noRetry because manifest content is not stable
|
ok <- (uploadGitObject rmt mk >> pure True)
|
||||||
ok <- upload rmt mk (AssociatedFile Nothing)
|
`catchNonAsync` (const (pure False))
|
||||||
noRetry noNotification
|
|
||||||
-- Don't leave the manifest key in the annex objects
|
-- Don't leave the manifest key in the annex objects
|
||||||
-- directory.
|
-- directory.
|
||||||
unlinkAnnex mk
|
unlinkAnnex mk
|
||||||
|
@ -650,22 +677,46 @@ uploadManifest rmt manifest =
|
||||||
-- 3. Git bundle objects are not usually transferred between repositories
|
-- 3. Git bundle objects are not usually transferred between repositories
|
||||||
-- except special remotes (although the user can if they want to).
|
-- except special remotes (although the user can if they want to).
|
||||||
downloadGitBundle :: Remote -> Key -> Annex FilePath
|
downloadGitBundle :: Remote -> Key -> Annex FilePath
|
||||||
downloadGitBundle rmt k =
|
downloadGitBundle rmt k = getKeyExportLocations rmt k >>= \case
|
||||||
ifM (download rmt k (AssociatedFile Nothing) stdRetry noNotification)
|
Nothing -> dlwith $
|
||||||
|
download rmt k (AssociatedFile Nothing) stdRetry noNotification
|
||||||
|
Just locs -> dlwith $
|
||||||
|
anyM getexport locs
|
||||||
|
where
|
||||||
|
dlwith a = ifM a
|
||||||
( decodeBS <$> calcRepo (gitAnnexLocation k)
|
( decodeBS <$> calcRepo (gitAnnexLocation k)
|
||||||
, giveup $ "Failed to download " ++ serializeKey k
|
, giveup $ "Failed to download " ++ serializeKey k
|
||||||
)
|
)
|
||||||
|
|
||||||
-- Uploads a git bundle from the annex objects directory to the remote.
|
getexport loc = catchNonAsync (getexport' loc) (const (pure False))
|
||||||
|
getexport' loc =
|
||||||
|
getViaTmp rsp vc k (AssociatedFile Nothing) Nothing $ \tmp -> do
|
||||||
|
v <- Remote.retrieveExport (Remote.exportActions rmt)
|
||||||
|
k loc (decodeBS tmp) nullMeterUpdate
|
||||||
|
return (True, v)
|
||||||
|
rsp = Remote.retrievalSecurityPolicy rmt
|
||||||
|
vc = Remote.RemoteVerify rmt
|
||||||
|
|
||||||
|
-- Uploads a bundle or manifest object from the annex objects directory
|
||||||
|
-- to the remote.
|
||||||
--
|
--
|
||||||
-- Throws errors if the upload fails.
|
-- Throws errors if the upload fails.
|
||||||
--
|
--
|
||||||
-- This does not update the location log to indicate that the remote
|
-- This does not update the location log to indicate that the remote
|
||||||
-- contains the git bundle object.
|
-- contains the git object.
|
||||||
uploadGitBundle :: Remote -> Key -> Annex ()
|
uploadGitObject :: Remote -> Key -> Annex ()
|
||||||
uploadGitBundle rmt k =
|
uploadGitObject rmt k = getKeyExportLocations rmt k >>= \case
|
||||||
unlessM (upload rmt k (AssociatedFile Nothing) stdRetry noNotification) $
|
Just (loc:_) -> do
|
||||||
giveup $ "Failed to upload " ++ serializeKey k
|
objfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation k)
|
||||||
|
Remote.storeExport (Remote.exportActions rmt) objfile k loc nullMeterUpdate
|
||||||
|
_ ->
|
||||||
|
unlessM (upload rmt k (AssociatedFile Nothing) retry noNotification) $
|
||||||
|
giveup $ "Failed to upload " ++ serializeKey k
|
||||||
|
where
|
||||||
|
retry = case fromKey keyVariety k of
|
||||||
|
GitBundleKey -> stdRetry
|
||||||
|
-- Manifest keys are not stable
|
||||||
|
_ -> noRetry
|
||||||
|
|
||||||
-- Generates a git bundle, ingests it into the local objects directory,
|
-- Generates a git bundle, ingests it into the local objects directory,
|
||||||
-- and uploads its key to the special remote.
|
-- and uploads its key to the special remote.
|
||||||
|
@ -689,12 +740,12 @@ generateAndUploadGitBundle rmt bs manifest =
|
||||||
unless (bundlekey `elem` (inManifest manifest)) $ do
|
unless (bundlekey `elem` (inManifest manifest)) $ do
|
||||||
unlessM (moveAnnex bundlekey (AssociatedFile Nothing) (toRawFilePath tmp)) $
|
unlessM (moveAnnex bundlekey (AssociatedFile Nothing) (toRawFilePath tmp)) $
|
||||||
giveup "Unable to push"
|
giveup "Unable to push"
|
||||||
uploadGitBundle rmt bundlekey
|
uploadGitObject rmt bundlekey
|
||||||
`onException` unlinkAnnex bundlekey
|
`onException` unlinkAnnex bundlekey
|
||||||
return bundlekey
|
return bundlekey
|
||||||
|
|
||||||
dropKey :: Remote -> Key -> Annex Bool
|
dropKey :: Remote -> Key -> Annex Bool
|
||||||
dropKey rmt k = tryNonAsync (Remote.removeKey rmt k) >>= \case
|
dropKey rmt k = tryNonAsync (dropKey' rmt k) >>= \case
|
||||||
Right () -> return True
|
Right () -> return True
|
||||||
Left ex -> do
|
Left ex -> do
|
||||||
liftIO $ hPutStrLn stderr $
|
liftIO $ hPutStrLn stderr $
|
||||||
|
@ -703,6 +754,49 @@ dropKey rmt k = tryNonAsync (Remote.removeKey rmt k) >>= \case
|
||||||
++ " (" ++ show ex ++ ")"
|
++ " (" ++ show ex ++ ")"
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
dropKey' :: Remote -> Key -> Annex ()
|
||||||
|
dropKey' rmt k = getKeyExportLocations rmt k >>= \case
|
||||||
|
Nothing -> Remote.removeKey rmt k
|
||||||
|
Just locs -> forM_ locs $ \loc ->
|
||||||
|
Remote.removeExport (Remote.exportActions rmt) k loc
|
||||||
|
|
||||||
|
getKeyExportLocations :: Remote -> Key -> Annex (Maybe [ExportLocation])
|
||||||
|
getKeyExportLocations rmt k = do
|
||||||
|
cfg <- Annex.getGitConfig
|
||||||
|
u <- getUUID
|
||||||
|
return $ keyExportLocations rmt k cfg u
|
||||||
|
|
||||||
|
-- When the remote contains a tree, the git keys are stored
|
||||||
|
-- inside the .git/annex/objects/ directory in the remote.
|
||||||
|
--
|
||||||
|
-- The first ExportLocation in the returned list is the one that
|
||||||
|
-- is the same as the local repository would use. But it's possible
|
||||||
|
-- that one of the others in the list was used by another repository to
|
||||||
|
-- upload a git key.
|
||||||
|
keyExportLocations :: Remote -> Key -> GitConfig -> UUID -> Maybe [ExportLocation]
|
||||||
|
keyExportLocations rmt k cfg uuid
|
||||||
|
| exportTree (Remote.config rmt) || importTree (Remote.config rmt) =
|
||||||
|
Just $ map (\p -> mkExportLocation (".git" P.</> p)) $
|
||||||
|
concatMap (`annexLocationsNonBare` k) cfgs
|
||||||
|
| otherwise = Nothing
|
||||||
|
where
|
||||||
|
-- When git-annex has not been initialized yet (eg, when cloning),
|
||||||
|
-- the Differences are unknown, so make a version of the GitConfig
|
||||||
|
-- with and without the OneLevelObjectHash difference.
|
||||||
|
cfgs
|
||||||
|
| uuid /= NoUUID = [cfg]
|
||||||
|
| hasDifference OneLevelObjectHash (annexDifferences cfg) =
|
||||||
|
[ cfg
|
||||||
|
, cfg { annexDifferences = mempty }
|
||||||
|
]
|
||||||
|
| otherwise =
|
||||||
|
[ cfg
|
||||||
|
, cfg
|
||||||
|
{ annexDifferences = mkDifferences
|
||||||
|
(S.singleton OneLevelObjectHash)
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
-- Tracking refs are used to remember the refs that are currently on the
|
-- Tracking refs are used to remember the refs that are currently on the
|
||||||
-- remote. This is different from git's remote tracking branches, since it
|
-- remote. This is different from git's remote tracking branches, since it
|
||||||
-- needs to track all refs on the remote, not only the refs that the user
|
-- needs to track all refs on the remote, not only the refs that the user
|
||||||
|
|
|
@ -17,6 +17,7 @@ module Types.Difference (
|
||||||
differenceConfigVal,
|
differenceConfigVal,
|
||||||
hasDifference,
|
hasDifference,
|
||||||
listDifferences,
|
listDifferences,
|
||||||
|
mkDifferences,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Utility.PartialPrelude
|
import Utility.PartialPrelude
|
||||||
|
|
|
@ -18,6 +18,11 @@ and are in the process of being deleted.
|
||||||
|
|
||||||
(Lines end with unix `"\n"`, not `"\r\n"`.)
|
(Lines end with unix `"\n"`, not `"\r\n"`.)
|
||||||
|
|
||||||
|
# exporttree=yes remotes
|
||||||
|
|
||||||
|
In an exporttree=yes remote, the GITMANIFEST and GITBUNDLE objects are
|
||||||
|
stored in the remote, under the `.git/annex/objects/` path.
|
||||||
|
|
||||||
# multiple GITMANIFEST files
|
# multiple GITMANIFEST files
|
||||||
|
|
||||||
Usually there will only be one per special remote, but it's possible for
|
Usually there will only be one per special remote, but it's possible for
|
||||||
|
@ -38,6 +43,6 @@ stored in such a special remote, this procedure will work:
|
||||||
(Note that later bundles can update refs from the versions in previous
|
(Note that later bundles can update refs from the versions in previous
|
||||||
bundles.)
|
bundles.)
|
||||||
|
|
||||||
When the special remote is encryptee, the GITMANIFEST and GITBUNDLE will
|
When the special remote is encrypted, the GITMANIFEST and GITBUNDLE will
|
||||||
also be encrypted. To decrypt those manually, see this
|
also be encrypted. To decrypt those manually, see this
|
||||||
[[fairly simple shell script using standard tools|tips/Decrypting_files_in_special_remotes_without_git-annex]].
|
[[fairly simple shell script using standard tools|tips/Decrypting_files_in_special_remotes_without_git-annex]].
|
||||||
|
|
|
@ -12,7 +12,7 @@ This is implememented and working. Remaining todo list for it:
|
||||||
|
|
||||||
* Need to test all types of pushes, barely tested at all.
|
* Need to test all types of pushes, barely tested at all.
|
||||||
|
|
||||||
* Support exporttree=yes remotes.
|
* Need to test exporttree=yes remotes.
|
||||||
|
|
||||||
* Support max-bundles config
|
* Support max-bundles config
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ This is implememented and working. Remaining todo list for it:
|
||||||
where the remote is left with a deleted manifest when a push
|
where the remote is left with a deleted manifest when a push
|
||||||
is interrupted part way through. This should be recoverable
|
is interrupted part way through. This should be recoverable
|
||||||
by caching the manifest locally and re-uploading it when
|
by caching the manifest locally and re-uploading it when
|
||||||
the remote has no manifest.
|
the remote has no manifest or prompting the user to merge and re-push.
|
||||||
|
|
||||||
* datalad-annex supports cloning from the web special remote,
|
* datalad-annex supports cloning from the web special remote,
|
||||||
using an url that contains the result of pushing to eg, a directory
|
using an url that contains the result of pushing to eg, a directory
|
||||||
|
@ -87,3 +87,7 @@ This is implememented and working. Remaining todo list for it:
|
||||||
|
|
||||||
This should be fixable by making git-remote-annex not write to the
|
This should be fixable by making git-remote-annex not write to the
|
||||||
git-annex branch, but to eg, a temporary journal directory.
|
git-annex branch, but to eg, a temporary journal directory.
|
||||||
|
|
||||||
|
Also, when the remote uses importree=yes, pushing to it updates
|
||||||
|
content identifiers, which currently get recorded in the git-annex
|
||||||
|
branch. It would be good to avoid that being written as well.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue