Merge branch 'git-remote-annex'

This commit is contained in:
Joey Hess 2024-05-15 17:57:50 -04:00
commit 434a88c368
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
37 changed files with 1575 additions and 92 deletions

1
.gitignore vendored
View file

@ -13,6 +13,7 @@ Build/BuildVersion
Build/MakeMans
git-annex
git-annex-shell
git-remote-annex
man
git-union-merge
git-union-merge.1

View file

@ -727,7 +727,8 @@ stageJournal :: JournalLocked -> Annex () -> Annex ()
stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
prepareModifyIndex jl
g <- gitRepo
let dir = gitAnnexJournalDir g
st <- getState
let dir = gitAnnexJournalDir st g
(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
withHashObjectHandle $ \h ->
withJournalHandle gitAnnexJournalDir $ \jh ->

View file

@ -1,6 +1,6 @@
{- git-annex repository initialization
-
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -12,6 +12,7 @@ module Annex.Init (
checkInitializeAllowed,
ensureInitialized,
autoInitialize,
autoInitialize',
isInitialized,
initialize,
initialize',
@ -256,10 +257,13 @@ guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible)
- Checks repository version and handles upgrades too.
-}
autoInitialize :: Annex [Remote] -> Annex ()
autoInitialize remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
autoInitialize = autoInitialize' autoInitializeAllowed
autoInitialize' :: Annex Bool -> Annex [Remote] -> Annex ()
autoInitialize' check remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
where
needsinit =
whenM (initializeAllowed <&&> autoInitializeAllowed) $ do
whenM (initializeAllowed <&&> check) $ do
initialize Nothing Nothing
autoEnableSpecialRemotes remotelist

View file

@ -7,7 +7,7 @@
- All files in the journal must be a series of lines separated by
- newlines.
-
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -23,6 +23,8 @@ import qualified Git
import Annex.Perms
import Annex.Tmp
import Annex.LockFile
import Annex.BranchState
import Types.BranchState
import Utility.Directory.Stream
import qualified Utility.RawFilePath as R
@ -82,9 +84,10 @@ privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig
-}
setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
st <- getState
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
( return gitAnnexPrivateJournalDir
, return gitAnnexJournalDir
( return (gitAnnexPrivateJournalDir st)
, return (gitAnnexJournalDir st)
)
-- journal file is written atomically
let jfile = journalFile file
@ -106,9 +109,10 @@ newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath)
- branch. -}
checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile)
checkCanAppendJournalFile _jl ru file = do
st <- getState
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
( return gitAnnexPrivateJournalDir
, return gitAnnexJournalDir
( return (gitAnnexPrivateJournalDir st)
, return (gitAnnexJournalDir st)
)
let jfile = jd P.</> journalFile file
ifM (liftIO $ R.doesPathExist jfile)
@ -176,14 +180,12 @@ data GetPrivate = GetPrivate Bool
-}
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
getJournalFileStale (GetPrivate getprivate) file = do
-- Optimisation to avoid a second MVar access.
st <- Annex.getState id
let g = Annex.repo st
liftIO $
if getprivate && privateUUIDsKnown' st
then do
x <- getfrom (gitAnnexJournalDir g)
getfrom (gitAnnexPrivateJournalDir g) >>= \case
x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st))
getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case
Nothing -> return $ case x of
Nothing -> NoJournalledContent
Just b -> JournalledContent b
@ -193,7 +195,7 @@ getJournalFileStale (GetPrivate getprivate) file = do
-- happens in a merge of two
-- git-annex branches.
Just x' -> x' <> y
else getfrom (gitAnnexJournalDir g) >>= return . \case
else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case
Nothing -> NoJournalledContent
Just b -> JournalledContent b
where
@ -219,18 +221,20 @@ discardIncompleteAppend v
{- List of existing journal files in a journal directory, but without locking,
- may miss new ones just being added, or may have false positives if the
- journal is staged as it is run. -}
getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath]
getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath]
getJournalledFilesStale getjournaldir = do
g <- gitRepo
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents $ fromRawFilePath (getjournaldir g)
st <- Annex.getState id
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents (fromRawFilePath d)
return $ filter (`notElem` [".", ".."]) $
map (fileJournal . toRawFilePath) fs
{- Directory handle open on a journal directory. -}
withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
withJournalHandle getjournaldir a = do
d <- fromRepo getjournaldir
st <- Annex.getState id
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
where
-- avoid overhead of creating the journal directory when it already
@ -239,9 +243,10 @@ withJournalHandle getjournaldir a = do
`catchIO` (const (createAnnexDirectory d >> opendir d))
{- Checks if there are changes in the journal. -}
journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool
journalDirty :: (BranchState -> Git.Repo -> RawFilePath) -> Annex Bool
journalDirty getjournaldir = do
d <- fromRawFilePath <$> fromRepo getjournaldir
st <- getState
d <- fromRawFilePath <$> fromRepo (getjournaldir st)
liftIO $
(not <$> isDirectoryEmpty d)
`catchIO` (const $ doesDirectoryExist d)

View file

@ -118,6 +118,7 @@ import Key
import Types.UUID
import Types.GitConfig
import Types.Difference
import Types.BranchState
import qualified Git
import qualified Git.Types as Git
import Git.FilePath
@ -528,15 +529,19 @@ gitAnnexTransferDir r =
{- .git/annex/journal/ is used to journal changes made to the git-annex
- branch -}
gitAnnexJournalDir :: Git.Repo -> RawFilePath
gitAnnexJournalDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal"
gitAnnexJournalDir :: BranchState -> Git.Repo -> RawFilePath
gitAnnexJournalDir st r = P.addTrailingPathSeparator $
case alternateJournal st of
Nothing -> gitAnnexDir r P.</> "journal"
Just d -> d
{- .git/annex/journal.private/ is used to journal changes regarding private
- repositories. -}
gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath
gitAnnexPrivateJournalDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal-private"
gitAnnexPrivateJournalDir :: BranchState -> Git.Repo -> RawFilePath
gitAnnexPrivateJournalDir st r = P.addTrailingPathSeparator $
case alternateJournal st of
Nothing -> gitAnnexDir r P.</> "journal-private"
Just d -> d
{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> RawFilePath

View file

@ -1,6 +1,6 @@
{- git-annex special remote configuration
-
- Copyright 2019-2023 Joey Hess <id@joeyh.name>
- Copyright 2019-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}

104
Backend/GitRemoteAnnex.hs Normal file
View file

@ -0,0 +1,104 @@
{- Backends for git-remote-annex.
-
- GITBUNDLE keys store git bundles
- GITMANIFEST keys store ordered lists of GITBUNDLE keys
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Backend.GitRemoteAnnex (
backends,
genGitBundleKey,
genManifestKey,
isGitRemoteAnnexKey,
) where
import Annex.Common
import Types.Key
import Types.Backend
import Utility.Hash
import Utility.Metered
import qualified Backend.Hash as Hash
import qualified Data.ByteString.Short as S
import qualified Data.ByteString.Char8 as B8
backends :: [Backend]
backends = [gitbundle, gitmanifest]
gitbundle :: Backend
gitbundle = Backend
{ backendVariety = GitBundleKey
, genKey = Nothing
-- ^ Not provided because these keys can only be generated by
-- git-remote-annex.
, verifyKeyContent = Just $ Hash.checkKeyChecksum sameCheckSum hash
, verifyKeyContentIncrementally = Just (liftIO . incrementalVerifier)
, canUpgradeKey = Nothing
, fastMigrate = Nothing
, isStableKey = const True
, isCryptographicallySecure = Hash.cryptographicallySecure hash
, isCryptographicallySecureKey = const $ pure $
Hash.cryptographicallySecure hash
}
gitmanifest :: Backend
gitmanifest = Backend
{ backendVariety = GitManifestKey
, genKey = Nothing
, verifyKeyContent = Nothing
, verifyKeyContentIncrementally = Nothing
, canUpgradeKey = Nothing
, fastMigrate = Nothing
, isStableKey = const True
, isCryptographicallySecure = False
, isCryptographicallySecureKey = const $ pure False
}
-- git bundle keys use the sha256 hash.
hash :: Hash.Hash
hash = Hash.SHA2Hash (HashSize 256)
incrementalVerifier :: Key -> IO IncrementalVerifier
incrementalVerifier =
mkIncrementalVerifier sha2_256_context "checksum" . sameCheckSum
sameCheckSum :: Key -> String -> Bool
sameCheckSum key s = s == expected
where
-- The checksum comes after a UUID.
expected = reverse $ takeWhile (/= '-') $ reverse $
decodeBS $ S.fromShort $ fromKey keyName key
genGitBundleKey :: UUID -> RawFilePath -> MeterUpdate -> Annex Key
genGitBundleKey remoteuuid file meterupdate = do
filesize <- liftIO $ getFileSize file
s <- Hash.hashFile hash file meterupdate
return $ mkKey $ \k -> k
{ keyName = S.toShort $ fromUUID remoteuuid <> "-" <> encodeBS s
, keyVariety = GitBundleKey
, keySize = Just filesize
}
genManifestKey :: UUID -> Key
genManifestKey u = mkKey $ \kd -> kd
{ keyName = S.toShort (fromUUID u)
, keyVariety = GitManifestKey
}
{- Is the key a manifest or bundle key that belongs to the special remote
- with this uuid? -}
isGitRemoteAnnexKey :: UUID -> Key -> Bool
isGitRemoteAnnexKey u k =
case fromKey keyVariety k of
GitBundleKey -> sameuuid $
-- Remove the checksum that comes after the UUID.
B8.dropEnd 1 . B8.dropWhileEnd (/= '-')
GitManifestKey -> sameuuid id
_ -> False
where
sameuuid f = fromUUID u == f (S.fromShort (fromKey keyName k))

View file

@ -1,6 +1,6 @@
{- git-annex hashing backends
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -12,6 +12,10 @@ module Backend.Hash (
testKeyBackend,
keyHash,
descChecksum,
Hash(..),
cryptographicallySecure,
hashFile,
checkKeyChecksum
) where
import Annex.Common
@ -77,7 +81,7 @@ genBackend :: Hash -> Backend
genBackend hash = Backend
{ backendVariety = hashKeyVariety hash (HasExt False)
, genKey = Just (keyValue hash)
, verifyKeyContent = Just $ checkKeyChecksum hash
, verifyKeyContent = Just $ checkKeyChecksum sameCheckSum hash
, verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash
, canUpgradeKey = Just needsUpgrade
, fastMigrate = Just trivialMigrate
@ -122,10 +126,10 @@ keyValueE hash source meterupdate =
keyValue hash source meterupdate
>>= addE source (const $ hashKeyVariety hash (HasExt True))
checkKeyChecksum :: Hash -> Key -> RawFilePath -> Annex Bool
checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do
checkKeyChecksum :: (Key -> String -> Bool) -> Hash -> Key -> RawFilePath -> Annex Bool
checkKeyChecksum issame hash key file = catchIOErrorType HardwareFault hwfault $ do
showAction (UnquotedString descChecksum)
sameCheckSum key
issame key
<$> hashFile hash file nullMeterUpdate
where
hwfault e = do

View file

@ -18,12 +18,14 @@ import qualified Backend.External
import qualified Backend.Hash
import qualified Backend.WORM
import qualified Backend.URL
import qualified Backend.GitRemoteAnnex
{- Regular backends. Does not include externals or VURL. -}
regularBackendList :: [Backend]
regularBackendList = Backend.Hash.backends
++ Backend.WORM.backends
++ Backend.URL.backends
++ Backend.GitRemoteAnnex.backends
{- The default hashing backend. -}
defaultHashBackend :: Backend

View file

@ -224,6 +224,7 @@ installGitAnnex topdir = go (topdir </> "bin")
error "strip failed"
createSymbolicLink "git-annex" (bindir </> "git-annex-shell")
createSymbolicLink "git-annex" (bindir </> "git-remote-tor-annex")
createSymbolicLink "git-annex" (bindir </> "git-remote-annex")
main :: IO ()
main = getArgs >>= go

View file

@ -1,5 +1,8 @@
git-annex (10.20240431) UNRELEASED; urgency=medium
* git-remote-annex: New program which allows pushing a git repo to a
git-annex special remote, and cloning from a special remote.
(Based on Michael Hanke's git-remote-datalad-annex.)
* fsck: Fix recent reversion that made it say it was checksumming files
whose content is not present.
* Avoid the --fast option preventing checksumming in some cases it

951
CmdLine/GitRemoteAnnex.hs Normal file
View file

@ -0,0 +1,951 @@
{- git-remote-annex program
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module CmdLine.GitRemoteAnnex where
import Annex.Common
import Types.GitRemoteAnnex
import qualified Annex
import qualified Remote
import qualified Git.CurrentRepo
import qualified Git.Ref
import qualified Git.Branch
import qualified Git.Bundle
import qualified Git.Remote
import qualified Git.Remote.Remove
import qualified Annex.SpecialRemote as SpecialRemote
import qualified Annex.Branch
import qualified Annex.BranchState
import qualified Types.Remote as Remote
import qualified Logs.Remote
import Remote.Helper.Encryptable (parseEncryptionMethod)
import Annex.Transfer
import Backend.GitRemoteAnnex
import Config
import Types.Key
import Types.RemoteConfig
import Types.ProposedAccepted
import Types.Export
import Types.GitConfig
import Types.BranchState
import Types.Difference
import Types.Crypto
import Git.Types
import Logs.Difference
import Annex.Init
import Annex.UUID
import Annex.Content
import Annex.Perms
import Annex.SpecialRemote.Config
import Remote.List
import Remote.List.Util
import Utility.Tmp
import Utility.Tmp.Dir
import Utility.Env
import Utility.Metered
import Network.URI
import Data.Either
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.Map.Strict as M
import qualified System.FilePath.ByteString as P
import qualified Utility.RawFilePath as R
import qualified Data.Set as S
run :: [String] -> IO ()
run (remotename:url:[]) =
-- git strips the "annex::" prefix of the url
-- when running this command, so add it back
let url' = "annex::" ++ url
in case parseSpecialRemoteNameUrl remotename url' of
Left e -> giveup e
Right src -> do
repo <- getRepo
state <- Annex.new repo
Annex.eval state (run' src)
run (_remotename:[]) = giveup "remote url not configured"
run _ = giveup "expected remote name and url parameters"
run' :: SpecialRemoteConfig -> Annex ()
run' src = do
sab <- startAnnexBranch
-- Prevent any usual git-annex output to stdout, because
-- the output of this command is being parsed by git.
doQuietAction $
withSpecialRemote src sab $ \rmt -> do
ls <- lines <$> liftIO getContents
go rmt ls emptyState
where
go rmt (l:ls) st =
let (c, v) = splitLine l
in case c of
"capabilities" -> capabilities >> go rmt ls st
"list" -> case v of
"" -> list st rmt False >>= go rmt ls
"for-push" -> list st rmt True >>= go rmt ls
_ -> protocolError l
"fetch" -> fetch st rmt (l:ls)
>>= \ls' -> go rmt ls' st
"push" -> push st rmt (l:ls)
>>= \(ls', st') -> go rmt ls' st'
"" -> return ()
_ -> protocolError l
go _ [] _ = return ()
data State = State
{ manifestCache :: Maybe Manifest
, trackingRefs :: M.Map Ref Sha
}
emptyState :: State
emptyState = State
{ manifestCache = Nothing
, trackingRefs = mempty
}
protocolError :: String -> a
protocolError l = giveup $ "gitremote-helpers protocol error at " ++ show l
capabilities :: Annex ()
capabilities = do
liftIO $ putStrLn "fetch"
liftIO $ putStrLn "push"
liftIO $ putStrLn ""
liftIO $ hFlush stdout
list :: State -> Remote -> Bool -> Annex State
list st rmt forpush = do
manifest <- if forpush
then downloadManifestWhenPresent rmt
else downloadManifestOrFail rmt
l <- forM (inManifest manifest) $ \k -> do
b <- downloadGitBundle rmt k
heads <- inRepo $ Git.Bundle.listHeads b
-- Get all the objects from the bundle. This is done here
-- so that the tracking refs can be updated with what is
-- listed, and so what when a full repush is done, all
-- objects are available to be pushed.
when forpush $
inRepo $ Git.Bundle.unbundle b
-- The bundle may contain tracking refs, or regular refs,
-- make sure we're operating on regular refs.
return $ map (\(s, r) -> (fromTrackingRef rmt r, s)) heads
-- Later refs replace earlier refs with the same name.
let refmap = M.fromList $ concat l
let reflist = M.toList refmap
let trackingrefmap = M.mapKeys (toTrackingRef rmt) refmap
-- When listing for a push, update the tracking refs to match what
-- was listed. This is necessary in order for a full repush to know
-- what to push.
when forpush $
updateTrackingRefs True rmt trackingrefmap
-- Respond to git with a list of refs.
liftIO $ do
forM_ reflist $ \(ref, sha) ->
B8.putStrLn $ fromRef' sha <> " " <> fromRef' ref
-- Newline terminates list of refs.
putStrLn ""
hFlush stdout
-- Remember the tracking refs and manifest.
return $ st
{ manifestCache = Just manifest
, trackingRefs = trackingrefmap
}
-- Any number of fetch commands can be sent by git, asking for specific
-- things. We fetch everything new at once, so find the end of the fetch
-- commands (which is supposed to be a blank line) before fetching.
fetch :: State -> Remote -> [String] -> Annex [String]
fetch st rmt (l:ls) = case splitLine l of
("fetch", _) -> fetch st rmt ls
("", _) -> do
fetch' st rmt
return ls
_ -> do
fetch' st rmt
return (l:ls)
fetch st rmt [] = do
fetch' st rmt
return []
fetch' :: State -> Remote -> Annex ()
fetch' st rmt = do
manifest <- maybe (downloadManifestOrFail rmt) pure (manifestCache st)
forM_ (inManifest manifest) $ \k ->
downloadGitBundle rmt k >>= inRepo . Git.Bundle.unbundle
-- Newline indicates end of fetch.
liftIO $ do
putStrLn ""
hFlush stdout
-- Note that the git bundles that are generated to push contain
-- tracking refs, rather than the actual refs that the user requested to
-- push. This is done because git bundle does not allow creating a bundle
-- that contains refs with different names than the ones in the git
-- repository. Consider eg, git push remote foo:bar, where the destination
-- ref is bar, but there may be no bar ref locally, or the bar ref may
-- be different than foo. If git bundle supported GIT_NAMESPACE, it would
-- be possible to generate a bundle that contains the specified refs.
push :: State -> Remote -> [String] -> Annex ([String], State)
push st rmt ls = do
let (refspecs, ls') = collectRefSpecs ls
(responses, trackingrefs) <- calc refspecs ([], trackingRefs st)
updateTrackingRefs False rmt trackingrefs
(ok, st') <- if M.null trackingrefs
then pushEmpty st rmt
else if any forcedPush refspecs
then fullPush st rmt (M.keys trackingrefs)
else incrementalPush st rmt
(trackingRefs st) trackingrefs
if ok
then do
sendresponses responses
return (ls', st' { trackingRefs = trackingrefs })
else do
-- Restore the old tracking refs
updateTrackingRefs True rmt (trackingRefs st)
sendresponses $
map (const "error push failed") refspecs
return (ls', st')
where
calc
:: [RefSpec]
-> ([B.ByteString], M.Map Ref Sha)
-> Annex ([B.ByteString], M.Map Ref Sha)
calc [] (responses, trackingrefs) =
return (reverse responses, trackingrefs)
calc (r:rs) (responses, trackingrefs) =
let tr = toTrackingRef rmt (dstRef r)
okresp m = pure
( ("ok " <> fromRef' (dstRef r)):responses
, m
)
errresp msg = pure
( ("error " <> fromRef' (dstRef r) <> " " <> msg):responses
, trackingrefs
)
in calc rs =<< case srcRef r of
Just srcref
| forcedPush r -> okresp $
M.insert tr srcref trackingrefs
| otherwise -> ifM (isfastforward srcref tr)
( okresp $
M.insert tr srcref trackingrefs
, errresp "non-fast-forward"
)
Nothing -> okresp $ M.delete tr trackingrefs
-- Check if the push is a fast-forward that will not overwrite work
-- in the ref currently stored in the remote. This seems redundant
-- to git's own checking for non-fast-forwards. But unfortunately,
-- before git push checks that, it actually tells us to push.
-- That seems likely to be a bug in git, and this is a workaround.
isfastforward newref tr = case M.lookup tr (trackingRefs st) of
Just prevsha -> inRepo $ Git.Ref.isAncestor prevsha newref
Nothing -> pure True
-- Send responses followed by newline to indicate end of push.
sendresponses responses = liftIO $ do
mapM_ B8.putStrLn responses
putStrLn ""
hFlush stdout
-- Full push of the specified refs to the remote.
-- All git bundle objects listed in the old manifest will be
-- deleted after successful upload of the new git bundle and manifest.
--
-- If this is interrupted, or loses access to the remote mid way through, it
-- will leave the remote with unused bundle keys on it, but every bundle
-- key listed in the manifest will exist, so it's in a consistent, usable
-- state.
--
-- However, the manifest is replaced by first dropping the object and then
-- uploading a new one. Interrupting that will leave the remote without a
-- manifest, which will appear as if all tracking branches were deleted
-- from it.
fullPush :: State -> Remote -> [Ref] -> Annex (Bool, State)
fullPush st rmt refs = guardPush st $ do
oldmanifest <- maybe (downloadManifestWhenPresent rmt) pure
(manifestCache st)
fullPush' oldmanifest st rmt refs
fullPush' :: Manifest -> State -> Remote -> [Ref] -> Annex (Bool, State)
fullPush' oldmanifest st rmt refs =do
let bs = map Git.Bundle.fullBundleSpec refs
bundlekey <- generateAndUploadGitBundle rmt bs oldmanifest
uploadManifest rmt (mkManifest [bundlekey] [])
ok <- allM (dropKey rmt) $
filter (/= bundlekey) (inManifest oldmanifest)
return (ok, st { manifestCache = Nothing })
guardPush :: State -> Annex (Bool, State) -> Annex (Bool, State)
guardPush st a = catchNonAsync a $ \ex -> do
liftIO $ hPutStrLn stderr $
"Push failed (" ++ show ex ++ ")"
return (False, st { manifestCache = Nothing })
-- Incremental push of only the refs that changed.
--
-- No refs were deleted (that causes a fullPush), but new refs may
-- have been added.
incrementalPush :: State -> Remote -> M.Map Ref Sha -> M.Map Ref Sha -> Annex (Bool, State)
incrementalPush st rmt oldtrackingrefs newtrackingrefs = guardPush st $ do
oldmanifest <- maybe (downloadManifestWhenPresent rmt) pure (manifestCache st)
if length (inManifest oldmanifest) + 1 > remoteAnnexMaxGitBundles (Remote.gitconfig rmt)
then fullPush' oldmanifest st rmt (M.keys newtrackingrefs)
else go oldmanifest
where
go oldmanifest = do
bs <- calc [] (M.toList newtrackingrefs)
bundlekey <- generateAndUploadGitBundle rmt bs oldmanifest
uploadManifest rmt (oldmanifest <> mkManifest [bundlekey] [])
return (True, st { manifestCache = Nothing })
calc c [] = return (reverse c)
calc c ((ref, sha):refs) = case M.lookup ref oldtrackingrefs of
Just oldsha
| oldsha == sha -> calc c refs -- unchanged
| otherwise ->
ifM (inRepo $ Git.Ref.isAncestor oldsha ref)
( use $ checkprereq oldsha ref
, use $ findotherprereq ref sha
)
Nothing -> use $ findotherprereq ref sha
where
use a = do
bs <- a
calc (bs:c) refs
-- Unfortunately, git bundle will let a prerequisite specified
-- for one ref prevent it including another ref. For example,
-- where x is a ref that points at A, and y is a ref that points at
-- B (which has A as its parent), git bundle x A..y
-- will omit including the x ref in the bundle at all.
--
-- But we need to include all (changed) refs that the user
-- specified to push in the bundle. So, only include the sha
-- as a prerequisite when it will not prevent including another
-- changed ref in the bundle.
checkprereq prereq ref =
ifM (anyM shadows $ M.elems $ M.delete ref changedrefs)
( pure $ Git.Bundle.fullBundleSpec ref
, pure $ Git.Bundle.BundleSpec
{ Git.Bundle.preRequisiteRef = Just prereq
, Git.Bundle.includeRef = ref
}
)
where
shadows s
| s == prereq = pure True
| otherwise = inRepo $ Git.Ref.isAncestor s prereq
changedrefs = M.differenceWith
(\a b -> if a == b then Nothing else Just a)
newtrackingrefs oldtrackingrefs
-- When the old tracking ref is not able to be used as a
-- prerequisite, this to find some other ref that was previously
-- pushed that can be used as a prerequisite instead. This can
-- optimise the bundle size a bit in edge cases.
--
-- For example, a forced push of branch foo that resets it back
-- several commits can use a previously pushed bar as a prerequisite
-- if it's an ancestor of foo.
findotherprereq ref sha =
findotherprereq' ref sha (M.elems oldtrackingrefs)
findotherprereq' ref _ [] = pure (Git.Bundle.fullBundleSpec ref)
findotherprereq' ref sha (l:ls)
| l == sha = findotherprereq' ref sha ls
| otherwise = ifM (inRepo $ Git.Ref.isAncestor l ref)
( checkprereq l ref
, findotherprereq' ref sha ls
)
-- When the push deletes all refs from the remote, upload an empty
-- manifest and then drop all bundles that were listed in the manifest.
-- The manifest is emptied first so if this is interrupted, only
-- unused bundles will remain in the remote, rather than leaving the
-- remote with a manifest that refers to missing bundles.
pushEmpty :: State -> Remote -> Annex (Bool, State)
pushEmpty st rmt = do
manifest <- maybe (downloadManifestWhenPresent rmt) pure
(manifestCache st)
uploadManifest rmt mempty
ok <- allM (dropKey rmt)
(genManifestKey (Remote.uuid rmt) : inManifest manifest)
return (ok, st { manifestCache = Nothing })
data RefSpec = RefSpec
{ forcedPush :: Bool
, srcRef :: Maybe Ref -- ^ Nothing when deleting a ref
, dstRef :: Ref
}
deriving (Show)
-- Any number of push commands can be sent by git, specifying the refspecs
-- to push. They should be followed by a blank line.
collectRefSpecs :: [String] -> ([RefSpec], [String])
collectRefSpecs = go []
where
go c (l:ls) = case splitLine l of
("push", refspec) -> go (parseRefSpec refspec:c) ls
("", _) -> (c, ls)
_ -> (c, (l:ls))
go c [] = (c, [])
parseRefSpec :: String -> RefSpec
parseRefSpec ('+':s) = (parseRefSpec s) { forcedPush = True }
parseRefSpec s =
let (src, cdst) = break (== ':') s
dst = if null cdst then cdst else drop 1 cdst
deletesrc = null src
in RefSpec
-- To delete a ref, have to do a force push of all
-- remaining refs.
{ forcedPush = deletesrc
, srcRef = if deletesrc
then Nothing
else Just (Ref (encodeBS src))
, dstRef = Ref (encodeBS dst)
}
-- "foo bar" to ("foo", "bar")
-- "foo" to ("foo", "")
splitLine :: String -> (String, String)
splitLine l =
let (c, sv) = break (== ' ') l
v = if null sv then sv else drop 1 sv
in (c, v)
data SpecialRemoteConfig
= SpecialRemoteConfig
{ specialRemoteUUID :: UUID
, specialRemoteConfig :: RemoteConfig
, specialRemoteName :: Maybe RemoteName
, specialRemoteUrl :: String
}
| ExistingSpecialRemote RemoteName
deriving (Show)
-- The url for a special remote looks like
-- "annex::uuid?param=value&param=value..."
--
-- Also accept an url of "annex::", when a remote name is provided,
-- to use an already enabled special remote.
parseSpecialRemoteNameUrl :: String -> String -> Either String SpecialRemoteConfig
parseSpecialRemoteNameUrl remotename url
| url == "annex::" && remotename /= url = Right $
ExistingSpecialRemote remotename
| "annex::" `isPrefixOf` remotename = parseSpecialRemoteUrl url Nothing
| otherwise = parseSpecialRemoteUrl url (Just remotename)
parseSpecialRemoteUrl :: String -> Maybe RemoteName -> Either String SpecialRemoteConfig
parseSpecialRemoteUrl url remotename = case parseURI url of
Nothing -> Left "URL parse failed"
Just u -> case uriScheme u of
"annex:" -> case uriPath u of
"" -> Left "annex: URL did not include a UUID"
(':':p)
| null p -> Left "annex: URL did not include a UUID"
| otherwise -> Right $ SpecialRemoteConfig
{ specialRemoteUUID = toUUID p
, specialRemoteConfig = parsequery u
, specialRemoteName = remotename
, specialRemoteUrl = url
}
_ -> Left "annex: URL malformed"
_ -> Left "Not an annex: URL"
where
parsequery u = M.fromList $
map parsekv $ splitc '&' (drop 1 (uriQuery u))
parsekv kv =
let (k, sv) = break (== '=') kv
v = if null sv then sv else drop 1 sv
in (Proposed (unEscapeString k), Proposed (unEscapeString v))
-- Runs an action with a Remote as specified by the SpecialRemoteConfig.
withSpecialRemote :: SpecialRemoteConfig -> StartAnnexBranch -> (Remote -> Annex a) -> Annex a
withSpecialRemote (ExistingSpecialRemote remotename) _ a =
getEnabledSpecialRemoteByName remotename >>=
maybe (giveup $ "There is no special remote named " ++ remotename)
a
withSpecialRemote cfg@(SpecialRemoteConfig {}) sab a = case specialRemoteName cfg of
-- The name could be the name of an existing special remote,
-- if so use it as long as its UUID matches the UUID from the url.
Just remotename -> getEnabledSpecialRemoteByName remotename >>= \case
Just rmt
| Remote.uuid rmt == specialRemoteUUID cfg -> a rmt
| otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename
-- When cloning from an annex:: url,
-- this is used to set up the origin remote.
Nothing -> specialRemoteFromUrl sab
(initremote remotename >>= a)
Nothing -> specialRemoteFromUrl sab inittempremote
where
-- Initialize a new special remote with the provided configuration
-- and name.
initremote remotename = do
let c = M.insert SpecialRemote.nameField (Proposed remotename)
(specialRemoteConfig cfg)
t <- either giveup return (SpecialRemote.findType c)
dummycfg <- liftIO dummyRemoteGitConfig
(c', u) <- Remote.setup t Remote.Init (Just (specialRemoteUUID cfg))
Nothing c dummycfg
`onException` cleanupremote remotename
Logs.Remote.configSet u c'
setConfig (remoteConfig c' "url") (specialRemoteUrl cfg)
remotesChanged
getEnabledSpecialRemoteByName remotename >>= \case
Just rmt -> case checkSpecialRemoteProblems rmt of
Nothing -> return rmt
Just problem -> do
cleanupremote remotename
giveup problem
Nothing -> do
cleanupremote remotename
giveup "Unable to find special remote after setup."
-- Temporarily initialize a special remote, and remove it after
-- the action is run.
inittempremote =
let remotename = Git.Remote.makeLegalName $
"annex-temp-" ++ fromUUID (specialRemoteUUID cfg)
in bracket
(initremote remotename)
(const $ cleanupremote remotename)
a
cleanupremote remotename = do
l <- inRepo Git.Remote.listRemotes
when (remotename `elem` l) $
inRepo $ Git.Remote.Remove.remove remotename
-- When a special remote has already been enabled, just use it.
getEnabledSpecialRemoteByName :: RemoteName -> Annex (Maybe Remote)
getEnabledSpecialRemoteByName remotename =
Remote.byNameOnly remotename >>= \case
Nothing -> return Nothing
Just rmt ->
maybe (return (Just rmt)) giveup
(checkSpecialRemoteProblems rmt)
parseManifest :: B.ByteString -> Either String Manifest
parseManifest b =
let (outks, inks) = partitionEithers $ map parseline $ B8.lines b
in case (checkvalid [] inks, checkvalid [] outks) of
(Right inks', Right outks') ->
Right $ mkManifest inks' outks'
(Left err, _) -> Left err
(_, Left err) -> Left err
where
parseline l
| "-" `B.isPrefixOf` l =
Left $ deserializeKey' $ B.drop 1 l
| otherwise =
Right $ deserializeKey' l
checkvalid c [] = Right (reverse c)
checkvalid c (Just k:ks) = case fromKey keyVariety k of
GitBundleKey -> checkvalid (k:c) ks
_ -> Left $ "Wrong type of key in manifest " ++ serializeKey k
checkvalid _ (Nothing:_) =
Left "Error parsing manifest"
-- Avoid using special remotes that are thirdparty populated, because
-- there is no way to push the git repository keys into one.
--
-- XXX Avoid using special remotes that are encrypted by key
-- material stored in the git repository, since that would present a
-- chicken and egg problem when cloning.
checkSpecialRemoteProblems :: Remote -> Maybe String
checkSpecialRemoteProblems rmt
| Remote.thirdPartyPopulated (Remote.remotetype rmt) =
Just $ "Cannot use this thirdparty-populated special"
++ " remote as a git remote."
| importTree (Remote.config rmt) =
Just $ "Using importtree=yes special remotes as git remotes"
++ " is not yet supported."
| parseEncryptionMethod (unparsedRemoteConfig (Remote.config rmt)) /= Right NoneEncryption
&& not (remoteAnnexAllowEncryptedGitRepo (Remote.gitconfig rmt)) =
Just $ "Using an encrypted special remote as a git"
++ " remote makes it impossible to clone"
++ " from it. If you will never need to"
++ " clone from this remote, set: git config "
++ decodeBS allowencryptedgitrepo ++ " true"
| otherwise = Nothing
where
ConfigKey allowencryptedgitrepo = remoteAnnexConfig rmt "allow-encrypted-gitrepo"
-- Downloads the Manifest when present in the remote. When not present,
-- returns an empty Manifest.
downloadManifestWhenPresent :: Remote -> Annex Manifest
downloadManifestWhenPresent rmt = fromMaybe mempty <$> downloadManifest rmt
-- Downloads the Manifest, or fails if the remote does not contain it.
downloadManifestOrFail :: Remote -> Annex Manifest
downloadManifestOrFail rmt =
maybe (giveup "No git repository found in this remote.") return
=<< downloadManifest rmt
-- Downloads the Manifest or Nothing if the remote does not contain a
-- manifest.
--
-- Throws errors if the remote cannot be accessed or the download fails,
-- or if the manifest file cannot be parsed.
downloadManifest :: Remote -> Annex (Maybe Manifest)
downloadManifest rmt = getKeyExportLocations rmt mk >>= \case
Nothing -> ifM (Remote.checkPresent rmt mk)
( gettotmp $ \tmp ->
Remote.retrieveKeyFile rmt mk
(AssociatedFile Nothing) tmp
nullMeterUpdate Remote.NoVerify
, return Nothing
)
Just locs -> getexport locs
where
mk = genManifestKey (Remote.uuid rmt)
-- Downloads to a temporary file, rather than using eg
-- Annex.Transfer.download that would put it in the object
-- directory. The content of manifests is not stable, and so
-- it needs to re-download it fresh every time, and the object
-- file should not be stored locally.
gettotmp dl = withTmpFile "GITMANIFEST" $ \tmp tmph -> do
liftIO $ hClose tmph
_ <- dl tmp
b <- liftIO (B.readFile tmp)
case parseManifest b of
Right m -> return (Just m)
Left err -> giveup err
getexport [] = return Nothing
getexport (loc:locs) =
ifM (Remote.checkPresentExport (Remote.exportActions rmt) mk loc)
( gettotmp $ \tmp ->
Remote.retrieveExport (Remote.exportActions rmt)
mk loc tmp nullMeterUpdate
, getexport locs
)
-- Uploads the Manifest to the remote.
--
-- Throws errors if the remote cannot be accessed or the upload fails.
--
-- The manifest key is first dropped from the remote, then the new
-- content is uploaded. This is necessary because the same key is used,
-- and behavior of remotes is undefined when sending a key that is
-- already present on the remote, but with different content.
--
-- Note that if this is interrupted or loses access to the remote part
-- way through, it may leave the remote without a manifest file. That will
-- appear as if all refs have been deleted from the remote.
-- XXX It should be possible to remember when that happened, by writing
-- state to a file before, and then the next time git-remote-annex is run, it
-- could recover from the situation.
uploadManifest :: Remote -> Manifest -> Annex ()
uploadManifest rmt manifest =
withTmpFile "GITMANIFEST" $ \tmp tmph -> do
liftIO $ forM_ (inManifest manifest) $ \bundlekey ->
B8.hPutStrLn tmph (serializeKey' bundlekey)
liftIO $ hClose tmph
-- Remove old manifest if present.
dropKey' rmt mk
-- storeKey needs the key to be in the annex objects
-- directory, so put the manifest file there temporarily.
-- Using linkOrCopy rather than moveAnnex to avoid updating
-- InodeCache database. Also, works even when the repository
-- is configured to require only cryptographically secure
-- keys, which it is not.
objfile <- calcRepo (gitAnnexLocation mk)
res <- modifyContentDir objfile $
linkOrCopy mk (toRawFilePath tmp) objfile Nothing
unless (isJust res)
uploadfailed
ok <- (uploadGitObject rmt mk >> pure True)
`catchNonAsync` (const (pure False))
-- Don't leave the manifest key in the annex objects
-- directory.
unlinkAnnex mk
unless ok
uploadfailed
where
mk = genManifestKey (Remote.uuid rmt)
uploadfailed = giveup $ "Failed to upload " ++ serializeKey mk
-- Downloads a git bundle to the annex objects directory, unless
-- the object file is already present. Returns the filename of the object
-- file.
--
-- Throws errors if the download fails, or the checksum does not verify.
--
-- This does not update the location log to indicate that the local
-- repository contains the git bundle object. Reasons not to include:
-- 1. When this is being used in a git clone, the repository will not have
-- a UUID yet.
-- 2. It would unncessarily bloat the git-annex branch, which would then
-- lead to more things needing to be pushed to the special remote,
-- and so more things pulled from it, etc.
-- 3. Git bundle objects are not usually transferred between repositories
-- except special remotes (although the user can if they want to).
downloadGitBundle :: Remote -> Key -> Annex FilePath
downloadGitBundle rmt k = getKeyExportLocations rmt k >>= \case
Nothing -> dlwith $
download rmt k (AssociatedFile Nothing) stdRetry noNotification
Just locs -> dlwith $
anyM getexport locs
where
dlwith a = ifM a
( decodeBS <$> calcRepo (gitAnnexLocation k)
, giveup $ "Failed to download " ++ serializeKey k
)
getexport loc = catchNonAsync (getexport' loc) (const (pure False))
getexport' loc =
getViaTmp rsp vc k (AssociatedFile Nothing) Nothing $ \tmp -> do
v <- Remote.retrieveExport (Remote.exportActions rmt)
k loc (decodeBS tmp) nullMeterUpdate
return (True, v)
rsp = Remote.retrievalSecurityPolicy rmt
vc = Remote.RemoteVerify rmt
-- Uploads a bundle or manifest object from the annex objects directory
-- to the remote.
--
-- Throws errors if the upload fails.
--
-- This does not update the location log to indicate that the remote
-- contains the git object.
uploadGitObject :: Remote -> Key -> Annex ()
uploadGitObject rmt k = getKeyExportLocations rmt k >>= \case
Just (loc:_) -> do
objfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation k)
Remote.storeExport (Remote.exportActions rmt) objfile k loc nullMeterUpdate
_ ->
unlessM (upload rmt k (AssociatedFile Nothing) retry noNotification) $
giveup $ "Failed to upload " ++ serializeKey k
where
retry = case fromKey keyVariety k of
GitBundleKey -> stdRetry
-- Manifest keys are not stable
_ -> noRetry
-- Generates a git bundle, ingests it into the local objects directory,
-- and uploads its key to the special remote.
--
-- If the key is already present in the provided manifest, avoids
-- uploading it.
--
-- On failure, an exception is thrown, and nothing is added to the local
-- objects directory.
generateAndUploadGitBundle
:: Remote
-> [Git.Bundle.BundleSpec]
-> Manifest
-> Annex Key
generateAndUploadGitBundle rmt bs manifest =
withTmpFile "GITBUNDLE" $ \tmp tmph -> do
liftIO $ hClose tmph
inRepo $ Git.Bundle.create tmp bs
bundlekey <- genGitBundleKey (Remote.uuid rmt)
(toRawFilePath tmp) nullMeterUpdate
unless (bundlekey `elem` (inManifest manifest)) $ do
unlessM (moveAnnex bundlekey (AssociatedFile Nothing) (toRawFilePath tmp)) $
giveup "Unable to push"
uploadGitObject rmt bundlekey
`onException` unlinkAnnex bundlekey
return bundlekey
dropKey :: Remote -> Key -> Annex Bool
dropKey rmt k = tryNonAsync (dropKey' rmt k) >>= \case
Right () -> return True
Left ex -> do
liftIO $ hPutStrLn stderr $
"Failed to drop "
++ serializeKey k
++ " (" ++ show ex ++ ")"
return False
dropKey' :: Remote -> Key -> Annex ()
dropKey' rmt k = getKeyExportLocations rmt k >>= \case
Nothing -> Remote.removeKey rmt k
Just locs -> forM_ locs $ \loc ->
Remote.removeExport (Remote.exportActions rmt) k loc
getKeyExportLocations :: Remote -> Key -> Annex (Maybe [ExportLocation])
getKeyExportLocations rmt k = do
cfg <- Annex.getGitConfig
u <- getUUID
return $ keyExportLocations rmt k cfg u
-- When the remote contains a tree, the git keys are stored
-- inside the .git/annex/objects/ directory in the remote.
--
-- The first ExportLocation in the returned list is the one that
-- is the same as the local repository would use. But it's possible
-- that one of the others in the list was used by another repository to
-- upload a git key.
keyExportLocations :: Remote -> Key -> GitConfig -> UUID -> Maybe [ExportLocation]
keyExportLocations rmt k cfg uuid
| exportTree (Remote.config rmt) || importTree (Remote.config rmt) =
Just $ map (\p -> mkExportLocation (".git" P.</> p)) $
concatMap (`annexLocationsNonBare` k) cfgs
| otherwise = Nothing
where
-- When git-annex has not been initialized yet (eg, when cloning),
-- the Differences are unknown, so make a version of the GitConfig
-- with and without the OneLevelObjectHash difference.
cfgs
| uuid /= NoUUID = [cfg]
| hasDifference OneLevelObjectHash (annexDifferences cfg) =
[ cfg
, cfg { annexDifferences = mempty }
]
| otherwise =
[ cfg
, cfg
{ annexDifferences = mkDifferences
(S.singleton OneLevelObjectHash)
}
]
-- Tracking refs are used to remember the refs that are currently on the
-- remote. This is different from git's remote tracking branches, since it
-- needs to track all refs on the remote, not only the refs that the user
-- chooses to fetch.
--
-- For refs/heads/master, the tracking ref is
-- refs/namespaces/git-remote-annex/uuid/refs/heads/master,
-- using the uuid of the remote. See gitnamespaces(7).
trackingRefPrefix :: Remote -> B.ByteString
trackingRefPrefix rmt = "refs/namespaces/git-remote-annex/"
<> fromUUID (Remote.uuid rmt) <> "/"
toTrackingRef :: Remote -> Ref -> Ref
toTrackingRef rmt (Ref r) = Ref $ trackingRefPrefix rmt <> r
-- If the ref is not a tracking ref, it is returned as-is.
fromTrackingRef :: Remote -> Ref -> Ref
fromTrackingRef rmt = Git.Ref.removeBase (decodeBS (trackingRefPrefix rmt))
-- Update the tracking refs to be those in the map.
-- When deleteold is set, any other tracking refs are deleted.
updateTrackingRefs :: Bool -> Remote -> M.Map Ref Sha -> Annex ()
updateTrackingRefs deleteold rmt new = do
old <- inRepo $ Git.Ref.forEachRef
[Param (decodeBS (trackingRefPrefix rmt))]
-- Delete all tracking refs that are not in the map.
when deleteold $
forM_ (filter (\p -> M.notMember (fst p) new) old) $ \(s, r) ->
inRepo $ Git.Ref.delete s r
-- Update all changed tracking refs.
let oldmap = M.fromList (map (\(s, r) -> (r, s)) old)
forM_ (M.toList new) $ \(r, s) ->
case M.lookup r oldmap of
Just s' | s' == s -> noop
_ -> inRepo $ Git.Branch.update' r s
-- git clone does not bother to set GIT_WORK_TREE when running this
-- program, and it does not run it inside the new git repo either.
-- GIT_DIR is set to the new git directory. So, have to override
-- the worktree to be the parent of the gitdir.
getRepo :: IO Repo
getRepo = getEnv "GIT_WORK_TREE" >>= \case
Just _ -> Git.CurrentRepo.get
Nothing -> fixup <$> Git.CurrentRepo.get
where
fixup r@(Repo { location = loc@(Local { worktree = Just _ }) }) =
r { location = loc { worktree = Just (P.takeDirectory (gitdir loc)) } }
fixup r = r
-- Records what the git-annex branch was at the beginning of this command.
data StartAnnexBranch
= AnnexBranchExistedAlready Sha
| AnnexBranchCreatedEmpty Sha
{- Run early in the command, gets the initial state of the git-annex
- branch.
-
- If the branch does not exist yet, it's created here. This is done
- because it's hard to avoid the branch being created by this command,
- so tracking the sha of the created branch allows cleaning it up later.
-}
startAnnexBranch :: Annex StartAnnexBranch
startAnnexBranch = ifM (null <$> Annex.Branch.siblingBranches)
( AnnexBranchCreatedEmpty <$> Annex.Branch.getBranch
, AnnexBranchExistedAlready <$> Annex.Branch.getBranch
)
-- This runs an action that will set up a special remote that
-- was specified using an annex url.
--
-- Setting up a special remote needs to write its config to the git-annex
-- branch. And using a special remote may also write to the branch.
-- But in this case, writes to the git-annex branch need to be avoided,
-- so that cleanupInitialization can leave things in the right state.
--
-- So this prevents commits to the git-annex branch, and redirects all
-- journal writes to a temporary directory, so that all writes
-- to the git-annex branch by the action will be discarded.
specialRemoteFromUrl :: StartAnnexBranch -> Annex a -> Annex a
specialRemoteFromUrl sab a = withTmpDir "journal" $ \tmpdir -> do
Annex.overrideGitConfig $ \c ->
c { annexAlwaysCommit = False }
Annex.BranchState.changeState $ \st ->
st { alternateJournal = Just (toRawFilePath tmpdir) }
a `finally` cleanupInitialization sab
-- If the git-annex branch did not exist when this command started,
-- it was created empty by this command, and this command has avoided
-- making any other commits to it. If nothing else has written to the
-- branch while this command was running, the branch will be deleted.
-- That allows for the git-annex branch that is fetched from the special
-- remote to contain Differences, which would prevent it from being merged
-- with the git-annex branch created by this command.
--
-- If there is still not a sibling git-annex branch, this deletes all annex
-- objects for git bundles from the annex objects directory, and deletes
-- the annex objects directory. That is necessary to avoid the
-- Annex.Init.objectDirNotPresent check preventing a later initialization.
-- And if the later initialization includes Differences, the git bundle
-- objects downloaded by this process would be in the wrong locations.
--
-- When there is now a sibling git-annex branch, this handles
-- initialization. When the initialized git-annex branch has Differences,
-- the git bundle objects are in the wrong place, so have to be deleted.
cleanupInitialization :: StartAnnexBranch -> Annex ()
cleanupInitialization sab = do
case sab of
AnnexBranchExistedAlready _ -> noop
AnnexBranchCreatedEmpty r ->
whenM ((r ==) <$> Annex.Branch.getBranch) $ do
inRepo $ Git.Branch.delete Annex.Branch.fullname
indexfile <- fromRepo gitAnnexIndex
liftIO $ removeWhenExistsWith R.removeLink indexfile
ifM Annex.Branch.hasSibling
( do
autoInitialize' (pure True) remoteList
differences <- allDifferences <$> recordedDifferences
when (differences /= mempty) $
deletebundleobjects
, deletebundleobjects
)
where
deletebundleobjects = do
annexobjectdir <- fromRepo gitAnnexObjectDir
ks <- listKeys InAnnex
forM_ ks $ \k -> case fromKey keyVariety k of
GitBundleKey -> lockContentForRemoval k noop removeAnnex
_ -> noop
void $ liftIO $ tryIO $ removeDirectory (decodeBS annexobjectdir)

View file

@ -25,7 +25,7 @@ run (_remotename:address:[]) = forever $
"capabilities" -> putStrLn "connect" >> ready
"connect git-upload-pack" -> go UploadPack
"connect git-receive-pack" -> go ReceivePack
l -> giveup $ "git-remote-helpers protocol error at " ++ show l
l -> giveup $ "gitremote-helpers protocol error at " ++ show l
where
(onionaddress, onionport)
| '/' `elem` address = parseAddressPort $

View file

@ -38,6 +38,7 @@ import Utility.CopyFile
import Git.FilePath
import Utility.PID
import Utility.InodeCache
import Utility.Metered
import Annex.InodeSentinal
import qualified Database.Keys
import qualified Database.Fsck as FsckDb
@ -206,8 +207,7 @@ performRemote key afile numcopies remote =
)
, return Nothing
)
getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) dummymeter (RemoteVerify remote)
dummymeter _ = noop
getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) nullMeterUpdate (RemoteVerify remote)
getcheap tmp = case Remote.retrieveKeyFileCheap remote of
Just a -> isRight <$> tryNonAsync (a key afile (fromRawFilePath tmp))
Nothing -> return False

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2010-2016 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -34,6 +34,7 @@ import Logs.View (is_branchView)
import Annex.BloomFilter
import qualified Database.Keys
import Annex.InodeSentinal
import Backend.GitRemoteAnnex (isGitRemoteAnnexKey)
import qualified Data.Map as M
import qualified Data.ByteString as S
@ -104,7 +105,8 @@ checkRemoteUnused remotename refspec = go =<< Remote.nameToUUID remotename
_ <- check "" (remoteUnusedMsg r remotename) (remoteunused u) 0
next $ return True
remoteunused u = loggedKeysFor u >>= \case
Just ks -> excludeReferenced refspec ks
Just ks -> filter (not . isGitRemoteAnnexKey u)
<$> excludeReferenced refspec ks
Nothing -> giveup "This repository is read-only."
check :: String -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int

68
Git/Bundle.hs Normal file
View file

@ -0,0 +1,68 @@
{- git bundles
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Git.Bundle where
import Common
import Git
import Git.Command
import Data.Char (ord)
import qualified Data.ByteString.Char8 as S8
listHeads :: FilePath -> Repo -> IO [(Sha, Ref)]
listHeads bundle repo = map gen . S8.lines <$>
pipeReadStrict [Param "bundle", Param "list-heads", File bundle] repo
where
gen l = let (s, r) = separate' (== fromIntegral (ord ' ')) l
in (Ref s, Ref r)
unbundle :: FilePath -> Repo -> IO ()
unbundle bundle = runQuiet [Param "bundle", Param "unbundle", File bundle]
-- Specifies what to include in the bundle.
data BundleSpec = BundleSpec
{ preRequisiteRef :: Maybe Ref
-- ^ Do not include this Ref, or any objects reachable from it
-- in the bundle. This should be an ancestor of the includeRef.
, includeRef :: Ref
-- ^ Include this Ref and objects reachable from it in the bundle,
-- unless filtered out by the preRequisiteRef of this BundleSpec
-- or any other one that is included in the bundle.
}
deriving (Show)
-- Include the ref and all objects reachable from it in the bundle.
-- (Unless another BundleSpec is included that has a preRequisiteRef
-- that filters out the ref or other objects.)
fullBundleSpec :: Ref -> BundleSpec
fullBundleSpec r = BundleSpec
{ preRequisiteRef = Nothing
, includeRef = r
}
create :: FilePath -> [BundleSpec] -> Repo -> IO ()
create bundle revs repo = pipeWrite
[ Param "bundle"
, Param "create"
, Param "--quiet"
, File bundle
, Param "--stdin"
] repo writer
where
writer h = do
forM_ revs $ \bs ->
case preRequisiteRef bs of
Nothing -> S8.hPutStrLn h $
fromRef' (includeRef bs)
Just pr -> S8.hPutStrLn h $
fromRef' pr
<> ".." <>
fromRef' (includeRef bs)
hClose h

View file

@ -1,6 +1,6 @@
{- git ref stuff
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -165,8 +165,18 @@ matchingUniq refs repo = nubBy uniqref <$> matching refs repo
list :: Repo -> IO [(Sha, Ref)]
list = matching' [] []
{- Deletes a ref. This can delete refs that are not branches,
- which git branch --delete refuses to delete. -}
{- Lists refs using for-each-ref. -}
forEachRef :: [CommandParam] -> Repo -> IO [(Sha, Branch)]
forEachRef ps repo = map gen . S8.lines <$>
pipeReadStrict (Param "for-each-ref" : ps ++ [format]) repo
where
format = Param "--format=%(objectname) %(refname)"
gen l = let (r, b) = separate' (== fromIntegral (ord ' ')) l
in (Ref r, Ref b)
{- Deletes a ref when it contains the specified sha.
- This can delete refs that are not branches, which
- git branch --delete refuses to delete. -}
delete :: Sha -> Ref -> Repo -> IO ()
delete oldvalue ref = run
[ Param "update-ref"
@ -175,6 +185,14 @@ delete oldvalue ref = run
, Param $ fromRef oldvalue
]
{- Deletes a ref no matter what it contains. -}
delete' :: Ref -> Repo -> IO ()
delete' ref = run
[ Param "update-ref"
, Param "-d"
, Param $ fromRef ref
]
{- Gets the sha of the tree a ref uses.
-
- The ref may be something like a branch name, and it could contain
@ -192,6 +210,19 @@ tree (Ref ref) = extractSha <$$> pipeReadStrict
-- de-reference commit objects to the tree
else ref <> ":"
{- Check if the first ref is an ancestor of the second ref.
-
- Note that if the two refs point to the same commit, it is considered
- to be an ancestor of itself.
-}
isAncestor :: Ref -> Ref -> Repo -> IO Bool
isAncestor r1 r2 = runBool
[ Param "merge-base"
, Param "--is-ancestor"
, Param (fromRef r1)
, Param (fromRef r2)
]
{- Checks if a String is a legal git ref name.
-
- The rules for this are complex; see git-check-ref-format(1) -}

View file

@ -1,6 +1,6 @@
{- git remote stuff
-
- Copyright 2012-2021 Joey Hess <id@joeyh.name>
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -13,6 +13,7 @@ module Git.Remote where
import Common
import Git
import Git.Types
import Git.Command
import Data.Char
import qualified Data.Map as M
@ -23,6 +24,11 @@ import Network.URI
import Git.FilePath
#endif
{- Lists all currently existing git remotes. -}
listRemotes :: Repo -> IO [RemoteName]
listRemotes repo = map decodeBS . S8.lines
<$> pipeReadStrict [Param "remote"] repo
{- Is a git config key one that specifies the url of a remote? -}
isRemoteUrlKey :: ConfigKey -> Bool
isRemoteUrlKey = isRemoteKey "url"

View file

@ -1,4 +1,4 @@
all=git-annex git-annex-shell mans docs
all=git-annex git-annex-shell git-remote-annex mans docs
# set to "./Setup" if you lack a cabal program. Or can be set to "stack"
BUILDER?=cabal
@ -70,6 +70,9 @@ git-annex: tmp/configure-stamp
git-annex-shell: git-annex
ln -sf git-annex git-annex-shell
git-remote-annex: git-annex
ln -sf git-annex git-remote-annex
# These are not built normally.
git-union-merge.1: doc/git-union-merge.mdwn
./Build/mdwn2man git-union-merge 1 doc/git-union-merge.mdwn > git-union-merge.1
@ -90,6 +93,7 @@ install-bins: build
install -d $(DESTDIR)$(PREFIX)/bin
install git-annex $(DESTDIR)$(PREFIX)/bin
ln -sf git-annex $(DESTDIR)$(PREFIX)/bin/git-annex-shell
ln -sf git-annex $(DESTDIR)$(PREFIX)/bin/git-remote-annex
ln -sf git-annex $(DESTDIR)$(PREFIX)/bin/git-remote-tor-annex
install-desktop: build Build/InstallDesktopFile
@ -141,7 +145,7 @@ clean:
doc/.ikiwiki html dist tags Build/SysConfig Build/Version \
Setup Build/InstallDesktopFile Build/Standalone \
Build/DistributionUpdate Build/BuildVersion Build/MakeMans \
git-annex-shell git-union-merge .tasty-rerun-log
git-annex-shell git-remote-annex git-union-merge .tasty-rerun-log
find . -name \*.o -exec rm {} \;
find . -name \*.hi -exec rm {} \;

View file

@ -92,7 +92,7 @@ list :: Bool -> Annex [Git.Repo]
list autoinit = do
c <- fromRepo Git.config
rs <- mapM (tweakurl c) =<< Annex.getGitRemotes
mapM (configRead autoinit) rs
mapM (configRead autoinit) (filter (not . isGitRemoteAnnex) rs)
where
annexurl r = remoteConfig r "annexurl"
tweakurl c r = do
@ -103,6 +103,9 @@ list autoinit = do
Git.Construct.remoteNamed n $
Git.Construct.fromRemoteLocation (Git.fromConfigValue url) False g
isGitRemoteAnnex :: Git.Repo -> Bool
isGitRemoteAnnex r = "annex::" `isPrefixOf` Git.repoLocation r
{- Git remotes are normally set up using standard git commands, not
- git-annex initremote and enableremote.
-

View file

@ -14,6 +14,7 @@ module Remote.Helper.Encryptable (
encryptionAlreadySetup,
encryptionConfigParsers,
parseEncryptionConfig,
parseEncryptionMethod,
remoteCipher,
remoteCipher',
embedCreds,
@ -85,7 +86,7 @@ encryptionFieldParser :: RemoteConfigFieldParser
encryptionFieldParser = RemoteConfigFieldParser
{ parserForField = encryptionField
, valueParser = \v c -> Just . RemoteConfigValue
<$> parseEncryptionMethod (fmap fromProposedAccepted v) c
<$> parseEncryptionMethod' v c
, fieldDesc = FieldDesc "how to encrypt data stored in the special remote"
, valueDesc = Just $ ValueDesc $
intercalate " or " (M.keys encryptionMethods)
@ -100,14 +101,18 @@ encryptionMethods = M.fromList
, ("sharedpubkey", SharedPubKeyEncryption)
]
parseEncryptionMethod :: Maybe String -> RemoteConfig -> Either String EncryptionMethod
parseEncryptionMethod (Just s) _ = case M.lookup s encryptionMethods of
Just em -> Right em
Nothing -> Left badEncryptionMethod
parseEncryptionMethod :: RemoteConfig -> Either String EncryptionMethod
parseEncryptionMethod c = parseEncryptionMethod' (M.lookup encryptionField c) c
parseEncryptionMethod' :: Maybe (ProposedAccepted String) -> RemoteConfig -> Either String EncryptionMethod
parseEncryptionMethod' (Just s) _ =
case M.lookup (fromProposedAccepted s) encryptionMethods of
Just em -> Right em
Nothing -> Left badEncryptionMethod
-- Hybrid encryption is the default when a keyid is specified without
-- an encryption field, or when there's a cipher already but no encryption
-- field.
parseEncryptionMethod Nothing c
parseEncryptionMethod' Nothing c
| M.member (Accepted "keyid") c || M.member cipherField c = Right HybridEncryption
| otherwise = Left badEncryptionMethod
@ -162,7 +167,7 @@ encryptionSetup c gc = do
maybe (genCipher pc gpgcmd) (updateCipher pc gpgcmd) (extractCipher pc)
where
-- The type of encryption
encryption = parseEncryptionMethod (fromProposedAccepted <$> M.lookup encryptionField c) c
encryption = parseEncryptionMethod c
-- Generate a new cipher, depending on the chosen encryption scheme
genCipher pc gpgcmd = case encryption of
Right NoneEncryption -> return (c, NoEncryption)

View file

@ -36,7 +36,10 @@ data BranchState = BranchState
-- process need to be noticed while the current process is running?
-- (This makes the journal always be read, and avoids using the
-- cache.)
, alternateJournal :: Maybe RawFilePath
-- ^ use this directory for all journals, rather than the
-- gitAnnexJournalDir and gitAnnexPrivateJournalDir.
}
startBranchState :: BranchState
startBranchState = BranchState False False False [] [] [] False
startBranchState = BranchState False False False [] [] [] False Nothing

View file

@ -17,6 +17,7 @@ module Types.Difference (
differenceConfigVal,
hasDifference,
listDifferences,
mkDifferences,
) where
import Utility.PartialPrelude

View file

@ -373,6 +373,8 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexBwLimitDownload :: Maybe BwRate
, remoteAnnexAllowUnverifiedDownloads :: Bool
, remoteAnnexConfigUUID :: Maybe UUID
, remoteAnnexMaxGitBundles :: Int
, remoteAnnexAllowEncryptedGitRepo :: Bool
{- These settings are specific to particular types of remotes
- including special remotes. -}
@ -430,7 +432,8 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexTrustLevel = notempty $ getmaybe "trustlevel"
, remoteAnnexStartCommand = notempty $ getmaybe "start-command"
, remoteAnnexStopCommand = notempty $ getmaybe "stop-command"
, remoteAnnexSpeculatePresent = getbool "speculate-present" False
, remoteAnnexSpeculatePresent =
getbool "speculate-present" False
, remoteAnnexBare = getmaybebool "bare"
, remoteAnnexRetry = getmayberead "retry"
, remoteAnnexForwardRetry = getmayberead "forward-retry"
@ -476,6 +479,10 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexDdarRepo = getmaybe "ddarrepo"
, remoteAnnexHookType = notempty $ getmaybe "hooktype"
, remoteAnnexExternalType = notempty $ getmaybe "externaltype"
, remoteAnnexMaxGitBundles =
fromMaybe 100 (getmayberead "max-git-bundles")
, remoteAnnexAllowEncryptedGitRepo =
getbool "allow-encrypted-gitrepo" False
}
where
getbool k d = fromMaybe d $ getmaybebool k

44
Types/GitRemoteAnnex.hs Normal file
View file

@ -0,0 +1,44 @@
{- git-remote-annex types
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.GitRemoteAnnex
( Manifest
, mkManifest
, inManifest
, outManifest
) where
import Types.Key
import qualified Data.Semigroup as Sem
-- The manifest contains an ordered list of git bundle keys.
--
-- There is a second list of git bundle keys that are no longer
-- used and should be deleted. This list should never contain keys
-- that are in the first list.
data Manifest =
Manifest
{ inManifest :: [Key]
, outManifest :: [Key]
}
deriving (Show)
-- Smart constructor for Manifest. Preserves outManifest invariant.
mkManifest
:: [Key] -- ^ inManifest
-> [Key] -- ^ outManifest
-> Manifest
mkManifest inks outks = Manifest inks (filter (`notElem` inks) outks)
instance Monoid Manifest where
mempty = Manifest [] []
instance Sem.Semigroup Manifest where
a <> b = mkManifest
(inManifest a <> inManifest b)
(outManifest a <> outManifest b)

View file

@ -219,6 +219,8 @@ data KeyVariety
| WORMKey
| URLKey
| VURLKey
| GitBundleKey
| GitManifestKey
-- A key that is handled by some external backend.
| ExternalKey S.ByteString HasExt
-- Some repositories may contain keys of other varieties,
@ -253,6 +255,8 @@ hasExt (MD5Key (HasExt b)) = b
hasExt WORMKey = False
hasExt URLKey = False
hasExt VURLKey = False
hasExt GitBundleKey = False
hasExt GitManifestKey = False
hasExt (ExternalKey _ (HasExt b)) = b
hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E'
@ -282,6 +286,8 @@ formatKeyVariety v = case v of
WORMKey -> "WORM"
URLKey -> "URL"
VURLKey -> "VURL"
GitBundleKey -> "GITBUNDLE"
GitManifestKey -> "GITMANIFEST"
ExternalKey s e -> adde e ("X" <> s)
OtherKey s -> s
where
@ -347,6 +353,7 @@ parseKeyVariety "MD5E" = MD5Key (HasExt True)
parseKeyVariety "WORM" = WORMKey
parseKeyVariety "URL" = URLKey
parseKeyVariety "VURL" = VURLKey
parseKeyVariety "GITBUNDLE" = GitBundleKey
parseKeyVariety b
| "X" `S.isPrefixOf` b =
let b' = S.tail b

View file

@ -79,10 +79,6 @@ content of an annexed file remains unchanged.
passing it to a shell script. These types of keys are distinct from URLs/URIs
that may be attached to a key (using any backend) indicating the key's location
on the web or in one of [[special_remotes]].
* `GIT` -- This is used internally by git-annex when exporting trees
containing files stored in git, rather than git-annex. It represents a
git sha. This is never used for git-annex links, but information about
keys of this type is stored in the git-annex branch.
## external backends
@ -100,6 +96,19 @@ Like with git-annex's builtin backends, you can add "E" to the end of the
name of an external backend, to get a version that includes the file
extension in the key.
## internal use backends
Keys using these backends can sometimes be visible, but they are used by
git-annex for its own purposes, and not for your annexed files.
* `GIT` -- This is used internally by git-annex when exporting trees
containing files stored in git, rather than git-annex. It represents a
git sha. This is never used for git-annex links, but information about
keys of this type is stored in the git-annex branch.
* `GITBUNDLE` and `GITMANIFEST` -- Used by [[git-remote-annex]] to store
a git repository in a special remote. See
[[this_page|internals/git-remote-annex]] for details about these.
## notes
If you want to be able to prove that you're working with the same file

View file

@ -49,5 +49,5 @@ problem:
[[fairly simple shell script using standard tools|tips/Decrypting_files_in_special_remotes_without_git-annex]]
(gpg and openssl) can decrypt files stored on such
a remote, as long as you have access to the encryption keys (which
are stored in the git-annex branch of the repository, sometimes
encrypted with your gpg key).
for some types of encryption are stored in the git-annex branch of
the repository, sometimes encrypted with your gpg key).

View file

@ -1634,10 +1634,6 @@ Remotes are configured using these settings in `.git/config`.
configured by the trust and untrust commands. The value can be any of
"trusted", "semitrusted" or "untrusted".
* `remote.<name>.annex-availability`
This configuration setting is no longer used.
* `remote.<name>.annex-speculate-present`
Set to "true" to make git-annex speculate that this remote may contain the
@ -1652,11 +1648,36 @@ Remotes are configured using these settings in `.git/config`.
remotes, and is set when using [[git-annex-initremote]](1) with the
`--private` option.
* `remote.<name>.annex-max-git-bundles`, `annex.max-git-bundles`
When using [[git-remote-annex]] to store a git repository in a special
remote, this configures how many separate git bundle objects to store
in the special remote before re-uploading a single git bundle that contains
the entire git repository.
The default is 100, which aims to avoid often needing to often re-upload,
while preventing a new clone needing to download too many objects. Set to
0 to disable re-uploading.
* `remote.<name>.annex-allow-encrypted-gitrepo`
Setting this to true allows using [[git-remote-annex]] to push the git
repository to an encrypted special remote.
That is not allowed by default, because it is impossible to git clone
from an encrypted special remote, since it needs encryption keys stored
in the remote. So take care that, if you set this, you don't rely
on the encrypted special remote being the only copy of your git repository.
* `remote.<name>.annex-bare`
Can be used to tell git-annex if a remote is a bare repository
or not. Normally, git-annex determines this automatically.
* `remote.<name>.annex-availability`
This configuration setting is no longer used.
* `remote.<name>.annex-ssh-options`
Options to use when using ssh to talk to this remote.
@ -2194,6 +2215,10 @@ More git-annex documentation is available on its web site,
If git-annex is installed from a package, a copy of its documentation
should be included, in, for example, `/usr/share/doc/git-annex/`.
* [[git-annex-shell]](1)
* [[git-remote-annex]](1)
* [[git-remote-tor-annex]](1)
# AUTHOR
Joey Hess <id@joeyh.name>

71
doc/git-remote-annex.mdwn Normal file
View file

@ -0,0 +1,71 @@
# NAME
git-remote-annex - remote helper program to store a git repository in a git-annex special remote
# SYNOPSIS
git fetch annex::uuid?param=value&param=value...
# DESCRIPTION
This is a git remote helper program that allows git to clone,
pull and push from a git repository that is stored in a git-annex
special remote.
The format of the remote URL is "annex::" followed by the UUID of the
special remote, and then followed by all of the configuration parameters of
the special remote.
For example, to clone from a directory special remote:
git clone annex::358ff77e-0bc3-11ef-bc49-872e6695c0e3?type=directory&encryption=none&directory=/mnt/foo/
When configuring the url of an existing special remote, a
shorter url of "annex::" is sufficient. For example:
git-annex initremote foo type=directory encryption=none directory=/mnt/foo
git config remote.foo.url annex::
git push foo master
Configuring the url like that is automatically done when cloning from a
special remote, but not by [[git-annex-initremote]](1) and
[[git-annex-enableremote]](1).
When a special remote needs some additional credentials to be provided,
they are not included in the URL, and need to be provided when cloning from
the special remote. That is typically done by setting environment
variables. Some special remotes may also need environment variables to be
set when pulling or pushing.
The git repository is stored in the special remote using special annex objects
with names starting with "GITMANIFEST--" and "GITBUNDLE--". For details about
how the git repository is stored, see
<https://git-annex.branchable.com/internals/git-remote-annex/>
Pushes to a special remote are usually done incrementally. However,
sometimes the whole git repository (but not the annex) needs to be
re-uploaded. That is done when deleting a ref from the remote. It's also
done when too many git bundles accumulate in the special remote, as
configured by the `remote.<name>.annex-max-git-bundles` git config.
Like any git repository, a git repository stored on a special remote can
have conflicting things pushed to it from different places. This mostly
works the same as any other git repository, eg a push that overwrites other
work will be prevented unless forced. However, it is possible, when
conflicting pushes are being done at the same time, for one of the pushes
to be overwritten by the other one. In this sitiation, the push will appear
to have succeeded, but pulling later will show the true situation.
# SEE ALSO
gitremote-helpers(1)
[[git-annex]](1)
[[git-annex-initremote]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -21,7 +21,7 @@ service, its first line is used as the authtoken.
# SEE ALSO
git-remote-helpers(1)
gitremote-helpers(1)
[[git-annex]](1)

View file

@ -2,6 +2,8 @@ In the world of git, we're not scared about internal implementation
details, and sometimes we like to dive in and tweak things by hand. Here's
some documentation to that end.
[[!toc ]]
## The .git/ directory
### `.git/annex/objects/aa/bb/*/*`
@ -364,3 +366,8 @@ of actual annexed files.
These trees are recorded in history of the git-annex branch, but the
head of the git-annex branch will never contain them.
## Other internals documentation
* [[git-remote-annex]] documents how git repositories are stored
on special remotes when using git with "annex::" urls.

View file

@ -0,0 +1,48 @@
The [[git-remote-annex|/git-remote-annex]] command allows pushing a git
repository to a special remote, and later cloning from it.
This adds two new key types to git-annex, GITMANIFEST and a GITBUNDLE.
GITMANIFEST--$UUID is the manifest for a git repository stored in the
git-annex repository with that UUID.
GITBUNDLE--$UUID-sha256 is a git bundle.
# format of the manifest file
An ordered list of bundle keys, one per line.
Additionally, there may be bundle keys that are prefixed with "-".
These keys are not part of the current content of the git remote
and are in the process of being deleted.
(Lines end with unix `"\n"`, not `"\r\n"`.)
# exporttree=yes remotes
In an exporttree=yes remote, the GITMANIFEST and GITBUNDLE objects are
stored in the remote, under the `.git/annex/objects/` path.
# multiple GITMANIFEST files
Usually there will only be one per special remote, but it's possible for
multiple special remotes to point to the same object storage, and if so
multiple GITMANIFEST objects can be stored.
This is why the UUID of the special remote is included in the GITMANIFEST
key, and in the annex:: uri.
# manually cloning from these files
If you are unable to use git-annex and need to clone a git repository
stored in such a special remote, this procedure will work:
* Find and download the GITMANIFEST
* Download each listed GITBUNDLE
* `git fetch` from each new bundle in order.
(Note that later bundles can update refs from the versions in previous
bundles.)
When the special remote is encrypted, the GITMANIFEST and GITBUNDLE will
also be encrypted. To decrypt those manually, see this
[[fairly simple shell script using standard tools|tips/Decrypting_files_in_special_remotes_without_git-annex]].

View file

@ -5,8 +5,7 @@ directory.
But, git-annex also extends git's concept of remotes, with these special
types of remotes. These can be used by git-annex to store and retrieve
the content of files. They cannot be used by other git commands, and
the git history is not stored in them.
the content of files.
* [[adb]] (for Android devices)
* [[Amazon_Glacier|glacier]]
@ -94,15 +93,25 @@ To initialize a new special remote, use
the special remote you want to use for details about configuration and
examples of how to initremote it.
Once a special remote has been initialize, other clones of the repository can
Once a special remote has been initialized, other clones of the repository can
also enable it, by using [[git-annex enableremote|git-annex-enableremote]]
with the same name that was used to initialize it. (Run the command without
any name to get a list of available special remotes.)
Initializing or enabling a special remote adds it as a remote of your git
repository. You can't use git commands like `git pull` with the remote
(usually, there are exceptions like [[git-lfs]]), but you can use git-annex
commands.
repository.
## Storing a git repository in a special remote
Most special remotes do not include a clone of the git repository
by default, so you can't use commands like `git push` and `git pull`
with them. (There are some exceptions like [[git-lfs]].)
But it is possible to store a git repository in many special remotes,
using the [[git-remote-annex]] command. This involves configuring
the remote with an "annex::" url. It's even possible to `git clone`
from a special remote using such an url. See the documentation of
[[git-remote-annex]] for details.
## Unused content on special remotes

View file

@ -1,24 +1,70 @@
git-remote-annex will be a program that allows push/pull of a git
repository to any git-annex special remote.
git-remote-annex will be a program that allows push/pull/clone of a git
repository to many types of git-annex special remote.
This is a redesign and reimplementation of git-remote-datalad-annex.
It will be a safer implementation, will support incremental pushes, and
will be available to users who don't use datalad.
Work is in the `git-remote-annex` branch, currently we have a design for
the core data files and operations.
<http://source.git-annex.branchable.com/?p=source.git;a=blob;f=doc/internals/git-remote-annex.mdwn;hb=git-remote-annex>
Also, that branch has a proof of concept implementation in a shell script.
Though it doesn't yet use special remotes at all, it is able to do
incremental pushes to git bundles with a manifest.
I still need to do some design work around using the git-annex branch to
detect concurrent push situations where changes to the manifest get lost,
and re-add those changes to it later.
Also, it's not clear what will happen when two people make conflicting pushes
to a ref, the goal would be to replicate git push to a regular git remote,
but that may not be entirely possible. This will need to be investigated
further.
--[[Joey]]
---
This is implememented and working. Remaining todo list for it:
* Test incremental push edge cases involving checkprereq.
* Cloning from an annex:: url with importtree=yes doesn't work
(with or without exporttree=yes). This is because the ContentIdentifier
db is not populated.
* Improve recovery from interrupted push by using outManifest to clean up
after it. (Requires populating outManifest.)
* See XXX in uploadManifest about recovering from a situation
where the remote is left with a deleted manifest when a push
is interrupted part way through. This should be recoverable
by caching the manifest locally and re-uploading it when
the remote has no manifest or prompting the user to merge and re-push.
* It would be nice if git-annex could generate an annex:: url
for a special remote and show it to the user, eg when
they have set the shorthand "annex::" url, so they know the full url.
`git-annex info $remote` could also display it.
Currently, the user has to remember how the special remote was
configured and replicate it all in the url.
There are some difficulties to doing this, including that
RemoteConfig can have hidden fields that should be omitted.
* initremote/enableremote could have an option that configures the url to a
special remote to a annex:: url. This would make it easier to use
git-remote-annex, since the user would not need to set up the url
themselves. (Also it would then avoid setting `skipFetchAll = true`)
* datalad-annex supports cloning from the web special remote,
using an url that contains the result of pushing to eg, a directory
special remote.
`datalad-annex::https://example.com?type=web&url={noquery}`
Supporting something like this would be good.
* Improve behavior in push races. A race can overwrite a change
to the MANIFEST and lose work that was pushed from the other repo.
From the user's perspective, that situation is the same as if one repo
pushed new work, then the other repo did a git push --force, overwriting
the first repo's push. In the first repo, another push will then fail as
a non fast-forward, and the user can recover as usual. This is probably
okish.
But.. a MANIFEST overwrite will leave bundle files in the remote that
are not listed in the MANIFEST. It seems likely that git-annex could
detect that after the fact and clean it up. Eg, if it caches
the last MANIFEST it uploaded, next time it downloads the MANIFEST
it can check if there are bundle files in the old one that are not
in the new one. If so, it can drop those bundle files from the remote.
* A push race can also appear to the user as if they pushed a ref, but then
it got deleted from the remote. This happens when two pushes are
pushing different ref names. This might be harder for the user to
notice; git fetch does not indicate that a remote ref got deleted.
They would have to use git fetch --prune to notice the deletion.
Once the user does notice, they can re-push their ref to recover.
Can this be improved?

View file

@ -580,6 +580,7 @@ Executable git-annex
Author
Backend
Backend.External
Backend.GitRemoteAnnex
Backend.Hash
Backend.URL
Backend.Utilities
@ -606,6 +607,7 @@ Executable git-annex
CmdLine.GitAnnexShell.Fields
CmdLine.AnnexSetter
CmdLine.Option
CmdLine.GitRemoteAnnex
CmdLine.GitRemoteTorAnnex
CmdLine.Seek
CmdLine.Usage
@ -758,6 +760,7 @@ Executable git-annex
Git.AutoCorrect
Git.Branch
Git.BuildVersion
Git.Bundle
Git.CatFile
Git.CheckAttr
Git.CheckIgnore
@ -939,6 +942,7 @@ Executable git-annex
Types.Export
Types.FileMatcher
Types.GitConfig
Types.GitRemoteAnnex
Types.Group
Types.Import
Types.IndexFiles

View file

@ -1,6 +1,6 @@
{- git-annex main program dispatch
-
- Copyright 2010-2016 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -13,6 +13,7 @@ import Network.Socket (withSocketsDo)
import qualified CmdLine.GitAnnex
import qualified CmdLine.GitAnnexShell
import qualified CmdLine.GitRemoteAnnex
import qualified CmdLine.GitRemoteTorAnnex
import qualified Test
import qualified Benchmark
@ -35,6 +36,7 @@ main = sanitizeTopLevelExceptionMessages $ withSocketsDo $ do
where
run ps n = case takeFileName n of
"git-annex-shell" -> CmdLine.GitAnnexShell.run ps
"git-remote-annex" -> CmdLine.GitRemoteAnnex.run ps
"git-remote-tor-annex" -> CmdLine.GitRemoteTorAnnex.run ps
_ -> CmdLine.GitAnnex.run Test.optParser Test.runner Benchmark.mkGenerator ps