diff --git a/.gitignore b/.gitignore index 2d0859233f..e21cbf9c80 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ Build/BuildVersion Build/MakeMans git-annex git-annex-shell +git-remote-annex man git-union-merge git-union-merge.1 diff --git a/Annex/Branch.hs b/Annex/Branch.hs index bcc9ae114d..717cbc0400 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -727,7 +727,8 @@ stageJournal :: JournalLocked -> Annex () -> Annex () stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do prepareModifyIndex jl g <- gitRepo - let dir = gitAnnexJournalDir g + st <- getState + let dir = gitAnnexJournalDir st g (jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir) withHashObjectHandle $ \h -> withJournalHandle gitAnnexJournalDir $ \jh -> diff --git a/Annex/Init.hs b/Annex/Init.hs index 842ccb9e27..2af4012d43 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -1,6 +1,6 @@ {- git-annex repository initialization - - - Copyright 2011-2022 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -12,6 +12,7 @@ module Annex.Init ( checkInitializeAllowed, ensureInitialized, autoInitialize, + autoInitialize', isInitialized, initialize, initialize', @@ -256,10 +257,13 @@ guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible) - Checks repository version and handles upgrades too. -} autoInitialize :: Annex [Remote] -> Annex () -autoInitialize remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade +autoInitialize = autoInitialize' autoInitializeAllowed + +autoInitialize' :: Annex Bool -> Annex [Remote] -> Annex () +autoInitialize' check remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade where needsinit = - whenM (initializeAllowed <&&> autoInitializeAllowed) $ do + whenM (initializeAllowed <&&> check) $ do initialize Nothing Nothing autoEnableSpecialRemotes remotelist diff --git a/Annex/Journal.hs b/Annex/Journal.hs index ea6327606d..54dd3317ef 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -7,7 +7,7 @@ - All files in the journal must be a series of lines separated by - newlines. - - - Copyright 2011-2022 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -23,6 +23,8 @@ import qualified Git import Annex.Perms import Annex.Tmp import Annex.LockFile +import Annex.BranchState +import Types.BranchState import Utility.Directory.Stream import qualified Utility.RawFilePath as R @@ -82,9 +84,10 @@ privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig -} setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex () setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do + st <- getState jd <- fromRepo =<< ifM (regardingPrivateUUID ru) - ( return gitAnnexPrivateJournalDir - , return gitAnnexJournalDir + ( return (gitAnnexPrivateJournalDir st) + , return (gitAnnexJournalDir st) ) -- journal file is written atomically let jfile = journalFile file @@ -106,9 +109,10 @@ newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath) - branch. -} checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile) checkCanAppendJournalFile _jl ru file = do + st <- getState jd <- fromRepo =<< ifM (regardingPrivateUUID ru) - ( return gitAnnexPrivateJournalDir - , return gitAnnexJournalDir + ( return (gitAnnexPrivateJournalDir st) + , return (gitAnnexJournalDir st) ) let jfile = jd P. journalFile file ifM (liftIO $ R.doesPathExist jfile) @@ -176,14 +180,12 @@ data GetPrivate = GetPrivate Bool -} getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent getJournalFileStale (GetPrivate getprivate) file = do - -- Optimisation to avoid a second MVar access. st <- Annex.getState id - let g = Annex.repo st liftIO $ if getprivate && privateUUIDsKnown' st then do - x <- getfrom (gitAnnexJournalDir g) - getfrom (gitAnnexPrivateJournalDir g) >>= \case + x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) + getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case Nothing -> return $ case x of Nothing -> NoJournalledContent Just b -> JournalledContent b @@ -193,7 +195,7 @@ getJournalFileStale (GetPrivate getprivate) file = do -- happens in a merge of two -- git-annex branches. Just x' -> x' <> y - else getfrom (gitAnnexJournalDir g) >>= return . \case + else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case Nothing -> NoJournalledContent Just b -> JournalledContent b where @@ -219,18 +221,20 @@ discardIncompleteAppend v {- List of existing journal files in a journal directory, but without locking, - may miss new ones just being added, or may have false positives if the - journal is staged as it is run. -} -getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath] +getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath] getJournalledFilesStale getjournaldir = do - g <- gitRepo - fs <- liftIO $ catchDefaultIO [] $ - getDirectoryContents $ fromRawFilePath (getjournaldir g) + st <- Annex.getState id + let d = getjournaldir (Annex.branchstate st) (Annex.repo st) + fs <- liftIO $ catchDefaultIO [] $ + getDirectoryContents (fromRawFilePath d) return $ filter (`notElem` [".", ".."]) $ map (fileJournal . toRawFilePath) fs {- Directory handle open on a journal directory. -} -withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a +withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a withJournalHandle getjournaldir a = do - d <- fromRepo getjournaldir + st <- Annex.getState id + let d = getjournaldir (Annex.branchstate st) (Annex.repo st) bracket (opendir d) (liftIO . closeDirectory) (liftIO . a) where -- avoid overhead of creating the journal directory when it already @@ -239,9 +243,10 @@ withJournalHandle getjournaldir a = do `catchIO` (const (createAnnexDirectory d >> opendir d)) {- Checks if there are changes in the journal. -} -journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool +journalDirty :: (BranchState -> Git.Repo -> RawFilePath) -> Annex Bool journalDirty getjournaldir = do - d <- fromRawFilePath <$> fromRepo getjournaldir + st <- getState + d <- fromRawFilePath <$> fromRepo (getjournaldir st) liftIO $ (not <$> isDirectoryEmpty d) `catchIO` (const $ doesDirectoryExist d) diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 9b465dce8d..ee5b6d690f 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -118,6 +118,7 @@ import Key import Types.UUID import Types.GitConfig import Types.Difference +import Types.BranchState import qualified Git import qualified Git.Types as Git import Git.FilePath @@ -528,15 +529,19 @@ gitAnnexTransferDir r = {- .git/annex/journal/ is used to journal changes made to the git-annex - branch -} -gitAnnexJournalDir :: Git.Repo -> RawFilePath -gitAnnexJournalDir r = - P.addTrailingPathSeparator $ gitAnnexDir r P. "journal" +gitAnnexJournalDir :: BranchState -> Git.Repo -> RawFilePath +gitAnnexJournalDir st r = P.addTrailingPathSeparator $ + case alternateJournal st of + Nothing -> gitAnnexDir r P. "journal" + Just d -> d {- .git/annex/journal.private/ is used to journal changes regarding private - repositories. -} -gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath -gitAnnexPrivateJournalDir r = - P.addTrailingPathSeparator $ gitAnnexDir r P. "journal-private" +gitAnnexPrivateJournalDir :: BranchState -> Git.Repo -> RawFilePath +gitAnnexPrivateJournalDir st r = P.addTrailingPathSeparator $ + case alternateJournal st of + Nothing -> gitAnnexDir r P. "journal-private" + Just d -> d {- Lock file for the journal. -} gitAnnexJournalLock :: Git.Repo -> RawFilePath diff --git a/Annex/SpecialRemote/Config.hs b/Annex/SpecialRemote/Config.hs index fff2c88c1d..7fbd0d4191 100644 --- a/Annex/SpecialRemote/Config.hs +++ b/Annex/SpecialRemote/Config.hs @@ -1,6 +1,6 @@ {- git-annex special remote configuration - - - Copyright 2019-2023 Joey Hess + - Copyright 2019-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} diff --git a/Backend/GitRemoteAnnex.hs b/Backend/GitRemoteAnnex.hs new file mode 100644 index 0000000000..84da8aee44 --- /dev/null +++ b/Backend/GitRemoteAnnex.hs @@ -0,0 +1,104 @@ +{- Backends for git-remote-annex. + - + - GITBUNDLE keys store git bundles + - GITMANIFEST keys store ordered lists of GITBUNDLE keys + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Backend.GitRemoteAnnex ( + backends, + genGitBundleKey, + genManifestKey, + isGitRemoteAnnexKey, +) where + +import Annex.Common +import Types.Key +import Types.Backend +import Utility.Hash +import Utility.Metered +import qualified Backend.Hash as Hash + +import qualified Data.ByteString.Short as S +import qualified Data.ByteString.Char8 as B8 + +backends :: [Backend] +backends = [gitbundle, gitmanifest] + +gitbundle :: Backend +gitbundle = Backend + { backendVariety = GitBundleKey + , genKey = Nothing + -- ^ Not provided because these keys can only be generated by + -- git-remote-annex. + , verifyKeyContent = Just $ Hash.checkKeyChecksum sameCheckSum hash + , verifyKeyContentIncrementally = Just (liftIO . incrementalVerifier) + , canUpgradeKey = Nothing + , fastMigrate = Nothing + , isStableKey = const True + , isCryptographicallySecure = Hash.cryptographicallySecure hash + , isCryptographicallySecureKey = const $ pure $ + Hash.cryptographicallySecure hash + } + +gitmanifest :: Backend +gitmanifest = Backend + { backendVariety = GitManifestKey + , genKey = Nothing + , verifyKeyContent = Nothing + , verifyKeyContentIncrementally = Nothing + , canUpgradeKey = Nothing + , fastMigrate = Nothing + , isStableKey = const True + , isCryptographicallySecure = False + , isCryptographicallySecureKey = const $ pure False + } + +-- git bundle keys use the sha256 hash. +hash :: Hash.Hash +hash = Hash.SHA2Hash (HashSize 256) + +incrementalVerifier :: Key -> IO IncrementalVerifier +incrementalVerifier = + mkIncrementalVerifier sha2_256_context "checksum" . sameCheckSum + +sameCheckSum :: Key -> String -> Bool +sameCheckSum key s = s == expected + where + -- The checksum comes after a UUID. + expected = reverse $ takeWhile (/= '-') $ reverse $ + decodeBS $ S.fromShort $ fromKey keyName key + +genGitBundleKey :: UUID -> RawFilePath -> MeterUpdate -> Annex Key +genGitBundleKey remoteuuid file meterupdate = do + filesize <- liftIO $ getFileSize file + s <- Hash.hashFile hash file meterupdate + return $ mkKey $ \k -> k + { keyName = S.toShort $ fromUUID remoteuuid <> "-" <> encodeBS s + , keyVariety = GitBundleKey + , keySize = Just filesize + } + +genManifestKey :: UUID -> Key +genManifestKey u = mkKey $ \kd -> kd + { keyName = S.toShort (fromUUID u) + , keyVariety = GitManifestKey + } + +{- Is the key a manifest or bundle key that belongs to the special remote + - with this uuid? -} +isGitRemoteAnnexKey :: UUID -> Key -> Bool +isGitRemoteAnnexKey u k = + case fromKey keyVariety k of + GitBundleKey -> sameuuid $ + -- Remove the checksum that comes after the UUID. + B8.dropEnd 1 . B8.dropWhileEnd (/= '-') + GitManifestKey -> sameuuid id + _ -> False + where + sameuuid f = fromUUID u == f (S.fromShort (fromKey keyName k)) diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 16b3b56c72..d4d84db0d4 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -1,6 +1,6 @@ {- git-annex hashing backends - - - Copyright 2011-2021 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -12,6 +12,10 @@ module Backend.Hash ( testKeyBackend, keyHash, descChecksum, + Hash(..), + cryptographicallySecure, + hashFile, + checkKeyChecksum ) where import Annex.Common @@ -77,7 +81,7 @@ genBackend :: Hash -> Backend genBackend hash = Backend { backendVariety = hashKeyVariety hash (HasExt False) , genKey = Just (keyValue hash) - , verifyKeyContent = Just $ checkKeyChecksum hash + , verifyKeyContent = Just $ checkKeyChecksum sameCheckSum hash , verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash , canUpgradeKey = Just needsUpgrade , fastMigrate = Just trivialMigrate @@ -122,10 +126,10 @@ keyValueE hash source meterupdate = keyValue hash source meterupdate >>= addE source (const $ hashKeyVariety hash (HasExt True)) -checkKeyChecksum :: Hash -> Key -> RawFilePath -> Annex Bool -checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do +checkKeyChecksum :: (Key -> String -> Bool) -> Hash -> Key -> RawFilePath -> Annex Bool +checkKeyChecksum issame hash key file = catchIOErrorType HardwareFault hwfault $ do showAction (UnquotedString descChecksum) - sameCheckSum key + issame key <$> hashFile hash file nullMeterUpdate where hwfault e = do diff --git a/Backend/Variety.hs b/Backend/Variety.hs index b4da6f2a96..a48933c88a 100644 --- a/Backend/Variety.hs +++ b/Backend/Variety.hs @@ -18,12 +18,14 @@ import qualified Backend.External import qualified Backend.Hash import qualified Backend.WORM import qualified Backend.URL +import qualified Backend.GitRemoteAnnex {- Regular backends. Does not include externals or VURL. -} regularBackendList :: [Backend] regularBackendList = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends + ++ Backend.GitRemoteAnnex.backends {- The default hashing backend. -} defaultHashBackend :: Backend diff --git a/Build/Standalone.hs b/Build/Standalone.hs index 98c3b2cc89..367527430a 100644 --- a/Build/Standalone.hs +++ b/Build/Standalone.hs @@ -224,6 +224,7 @@ installGitAnnex topdir = go (topdir "bin") error "strip failed" createSymbolicLink "git-annex" (bindir "git-annex-shell") createSymbolicLink "git-annex" (bindir "git-remote-tor-annex") + createSymbolicLink "git-annex" (bindir "git-remote-annex") main :: IO () main = getArgs >>= go diff --git a/CHANGELOG b/CHANGELOG index b3c302ca17..075cb9a5a9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ git-annex (10.20240431) UNRELEASED; urgency=medium + * git-remote-annex: New program which allows pushing a git repo to a + git-annex special remote, and cloning from a special remote. + (Based on Michael Hanke's git-remote-datalad-annex.) * fsck: Fix recent reversion that made it say it was checksumming files whose content is not present. * Avoid the --fast option preventing checksumming in some cases it diff --git a/CmdLine/GitRemoteAnnex.hs b/CmdLine/GitRemoteAnnex.hs new file mode 100644 index 0000000000..d1eac6dfd8 --- /dev/null +++ b/CmdLine/GitRemoteAnnex.hs @@ -0,0 +1,951 @@ +{- git-remote-annex program + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module CmdLine.GitRemoteAnnex where + +import Annex.Common +import Types.GitRemoteAnnex +import qualified Annex +import qualified Remote +import qualified Git.CurrentRepo +import qualified Git.Ref +import qualified Git.Branch +import qualified Git.Bundle +import qualified Git.Remote +import qualified Git.Remote.Remove +import qualified Annex.SpecialRemote as SpecialRemote +import qualified Annex.Branch +import qualified Annex.BranchState +import qualified Types.Remote as Remote +import qualified Logs.Remote +import Remote.Helper.Encryptable (parseEncryptionMethod) +import Annex.Transfer +import Backend.GitRemoteAnnex +import Config +import Types.Key +import Types.RemoteConfig +import Types.ProposedAccepted +import Types.Export +import Types.GitConfig +import Types.BranchState +import Types.Difference +import Types.Crypto +import Git.Types +import Logs.Difference +import Annex.Init +import Annex.UUID +import Annex.Content +import Annex.Perms +import Annex.SpecialRemote.Config +import Remote.List +import Remote.List.Util +import Utility.Tmp +import Utility.Tmp.Dir +import Utility.Env +import Utility.Metered + +import Network.URI +import Data.Either +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 +import qualified Data.Map.Strict as M +import qualified System.FilePath.ByteString as P +import qualified Utility.RawFilePath as R +import qualified Data.Set as S + +run :: [String] -> IO () +run (remotename:url:[]) = + -- git strips the "annex::" prefix of the url + -- when running this command, so add it back + let url' = "annex::" ++ url + in case parseSpecialRemoteNameUrl remotename url' of + Left e -> giveup e + Right src -> do + repo <- getRepo + state <- Annex.new repo + Annex.eval state (run' src) +run (_remotename:[]) = giveup "remote url not configured" +run _ = giveup "expected remote name and url parameters" + +run' :: SpecialRemoteConfig -> Annex () +run' src = do + sab <- startAnnexBranch + -- Prevent any usual git-annex output to stdout, because + -- the output of this command is being parsed by git. + doQuietAction $ + withSpecialRemote src sab $ \rmt -> do + ls <- lines <$> liftIO getContents + go rmt ls emptyState + where + go rmt (l:ls) st = + let (c, v) = splitLine l + in case c of + "capabilities" -> capabilities >> go rmt ls st + "list" -> case v of + "" -> list st rmt False >>= go rmt ls + "for-push" -> list st rmt True >>= go rmt ls + _ -> protocolError l + "fetch" -> fetch st rmt (l:ls) + >>= \ls' -> go rmt ls' st + "push" -> push st rmt (l:ls) + >>= \(ls', st') -> go rmt ls' st' + "" -> return () + _ -> protocolError l + go _ [] _ = return () + +data State = State + { manifestCache :: Maybe Manifest + , trackingRefs :: M.Map Ref Sha + } + +emptyState :: State +emptyState = State + { manifestCache = Nothing + , trackingRefs = mempty + } + +protocolError :: String -> a +protocolError l = giveup $ "gitremote-helpers protocol error at " ++ show l + +capabilities :: Annex () +capabilities = do + liftIO $ putStrLn "fetch" + liftIO $ putStrLn "push" + liftIO $ putStrLn "" + liftIO $ hFlush stdout + +list :: State -> Remote -> Bool -> Annex State +list st rmt forpush = do + manifest <- if forpush + then downloadManifestWhenPresent rmt + else downloadManifestOrFail rmt + l <- forM (inManifest manifest) $ \k -> do + b <- downloadGitBundle rmt k + heads <- inRepo $ Git.Bundle.listHeads b + -- Get all the objects from the bundle. This is done here + -- so that the tracking refs can be updated with what is + -- listed, and so what when a full repush is done, all + -- objects are available to be pushed. + when forpush $ + inRepo $ Git.Bundle.unbundle b + -- The bundle may contain tracking refs, or regular refs, + -- make sure we're operating on regular refs. + return $ map (\(s, r) -> (fromTrackingRef rmt r, s)) heads + + -- Later refs replace earlier refs with the same name. + let refmap = M.fromList $ concat l + let reflist = M.toList refmap + let trackingrefmap = M.mapKeys (toTrackingRef rmt) refmap + + -- When listing for a push, update the tracking refs to match what + -- was listed. This is necessary in order for a full repush to know + -- what to push. + when forpush $ + updateTrackingRefs True rmt trackingrefmap + + -- Respond to git with a list of refs. + liftIO $ do + forM_ reflist $ \(ref, sha) -> + B8.putStrLn $ fromRef' sha <> " " <> fromRef' ref + -- Newline terminates list of refs. + putStrLn "" + hFlush stdout + + -- Remember the tracking refs and manifest. + return $ st + { manifestCache = Just manifest + , trackingRefs = trackingrefmap + } + +-- Any number of fetch commands can be sent by git, asking for specific +-- things. We fetch everything new at once, so find the end of the fetch +-- commands (which is supposed to be a blank line) before fetching. +fetch :: State -> Remote -> [String] -> Annex [String] +fetch st rmt (l:ls) = case splitLine l of + ("fetch", _) -> fetch st rmt ls + ("", _) -> do + fetch' st rmt + return ls + _ -> do + fetch' st rmt + return (l:ls) +fetch st rmt [] = do + fetch' st rmt + return [] + +fetch' :: State -> Remote -> Annex () +fetch' st rmt = do + manifest <- maybe (downloadManifestOrFail rmt) pure (manifestCache st) + forM_ (inManifest manifest) $ \k -> + downloadGitBundle rmt k >>= inRepo . Git.Bundle.unbundle + -- Newline indicates end of fetch. + liftIO $ do + putStrLn "" + hFlush stdout + +-- Note that the git bundles that are generated to push contain +-- tracking refs, rather than the actual refs that the user requested to +-- push. This is done because git bundle does not allow creating a bundle +-- that contains refs with different names than the ones in the git +-- repository. Consider eg, git push remote foo:bar, where the destination +-- ref is bar, but there may be no bar ref locally, or the bar ref may +-- be different than foo. If git bundle supported GIT_NAMESPACE, it would +-- be possible to generate a bundle that contains the specified refs. +push :: State -> Remote -> [String] -> Annex ([String], State) +push st rmt ls = do + let (refspecs, ls') = collectRefSpecs ls + (responses, trackingrefs) <- calc refspecs ([], trackingRefs st) + updateTrackingRefs False rmt trackingrefs + (ok, st') <- if M.null trackingrefs + then pushEmpty st rmt + else if any forcedPush refspecs + then fullPush st rmt (M.keys trackingrefs) + else incrementalPush st rmt + (trackingRefs st) trackingrefs + if ok + then do + sendresponses responses + return (ls', st' { trackingRefs = trackingrefs }) + else do + -- Restore the old tracking refs + updateTrackingRefs True rmt (trackingRefs st) + sendresponses $ + map (const "error push failed") refspecs + return (ls', st') + where + calc + :: [RefSpec] + -> ([B.ByteString], M.Map Ref Sha) + -> Annex ([B.ByteString], M.Map Ref Sha) + calc [] (responses, trackingrefs) = + return (reverse responses, trackingrefs) + calc (r:rs) (responses, trackingrefs) = + let tr = toTrackingRef rmt (dstRef r) + okresp m = pure + ( ("ok " <> fromRef' (dstRef r)):responses + , m + ) + errresp msg = pure + ( ("error " <> fromRef' (dstRef r) <> " " <> msg):responses + , trackingrefs + ) + in calc rs =<< case srcRef r of + Just srcref + | forcedPush r -> okresp $ + M.insert tr srcref trackingrefs + | otherwise -> ifM (isfastforward srcref tr) + ( okresp $ + M.insert tr srcref trackingrefs + , errresp "non-fast-forward" + ) + Nothing -> okresp $ M.delete tr trackingrefs + + -- Check if the push is a fast-forward that will not overwrite work + -- in the ref currently stored in the remote. This seems redundant + -- to git's own checking for non-fast-forwards. But unfortunately, + -- before git push checks that, it actually tells us to push. + -- That seems likely to be a bug in git, and this is a workaround. + isfastforward newref tr = case M.lookup tr (trackingRefs st) of + Just prevsha -> inRepo $ Git.Ref.isAncestor prevsha newref + Nothing -> pure True + + -- Send responses followed by newline to indicate end of push. + sendresponses responses = liftIO $ do + mapM_ B8.putStrLn responses + putStrLn "" + hFlush stdout + +-- Full push of the specified refs to the remote. +-- All git bundle objects listed in the old manifest will be +-- deleted after successful upload of the new git bundle and manifest. +-- +-- If this is interrupted, or loses access to the remote mid way through, it +-- will leave the remote with unused bundle keys on it, but every bundle +-- key listed in the manifest will exist, so it's in a consistent, usable +-- state. +-- +-- However, the manifest is replaced by first dropping the object and then +-- uploading a new one. Interrupting that will leave the remote without a +-- manifest, which will appear as if all tracking branches were deleted +-- from it. +fullPush :: State -> Remote -> [Ref] -> Annex (Bool, State) +fullPush st rmt refs = guardPush st $ do + oldmanifest <- maybe (downloadManifestWhenPresent rmt) pure + (manifestCache st) + fullPush' oldmanifest st rmt refs + +fullPush' :: Manifest -> State -> Remote -> [Ref] -> Annex (Bool, State) +fullPush' oldmanifest st rmt refs =do + let bs = map Git.Bundle.fullBundleSpec refs + bundlekey <- generateAndUploadGitBundle rmt bs oldmanifest + uploadManifest rmt (mkManifest [bundlekey] []) + ok <- allM (dropKey rmt) $ + filter (/= bundlekey) (inManifest oldmanifest) + return (ok, st { manifestCache = Nothing }) + +guardPush :: State -> Annex (Bool, State) -> Annex (Bool, State) +guardPush st a = catchNonAsync a $ \ex -> do + liftIO $ hPutStrLn stderr $ + "Push failed (" ++ show ex ++ ")" + return (False, st { manifestCache = Nothing }) + +-- Incremental push of only the refs that changed. +-- +-- No refs were deleted (that causes a fullPush), but new refs may +-- have been added. +incrementalPush :: State -> Remote -> M.Map Ref Sha -> M.Map Ref Sha -> Annex (Bool, State) +incrementalPush st rmt oldtrackingrefs newtrackingrefs = guardPush st $ do + oldmanifest <- maybe (downloadManifestWhenPresent rmt) pure (manifestCache st) + if length (inManifest oldmanifest) + 1 > remoteAnnexMaxGitBundles (Remote.gitconfig rmt) + then fullPush' oldmanifest st rmt (M.keys newtrackingrefs) + else go oldmanifest + where + go oldmanifest = do + bs <- calc [] (M.toList newtrackingrefs) + bundlekey <- generateAndUploadGitBundle rmt bs oldmanifest + uploadManifest rmt (oldmanifest <> mkManifest [bundlekey] []) + return (True, st { manifestCache = Nothing }) + + calc c [] = return (reverse c) + calc c ((ref, sha):refs) = case M.lookup ref oldtrackingrefs of + Just oldsha + | oldsha == sha -> calc c refs -- unchanged + | otherwise -> + ifM (inRepo $ Git.Ref.isAncestor oldsha ref) + ( use $ checkprereq oldsha ref + , use $ findotherprereq ref sha + ) + Nothing -> use $ findotherprereq ref sha + where + use a = do + bs <- a + calc (bs:c) refs + + -- Unfortunately, git bundle will let a prerequisite specified + -- for one ref prevent it including another ref. For example, + -- where x is a ref that points at A, and y is a ref that points at + -- B (which has A as its parent), git bundle x A..y + -- will omit including the x ref in the bundle at all. + -- + -- But we need to include all (changed) refs that the user + -- specified to push in the bundle. So, only include the sha + -- as a prerequisite when it will not prevent including another + -- changed ref in the bundle. + checkprereq prereq ref = + ifM (anyM shadows $ M.elems $ M.delete ref changedrefs) + ( pure $ Git.Bundle.fullBundleSpec ref + , pure $ Git.Bundle.BundleSpec + { Git.Bundle.preRequisiteRef = Just prereq + , Git.Bundle.includeRef = ref + } + ) + where + shadows s + | s == prereq = pure True + | otherwise = inRepo $ Git.Ref.isAncestor s prereq + changedrefs = M.differenceWith + (\a b -> if a == b then Nothing else Just a) + newtrackingrefs oldtrackingrefs + + -- When the old tracking ref is not able to be used as a + -- prerequisite, this to find some other ref that was previously + -- pushed that can be used as a prerequisite instead. This can + -- optimise the bundle size a bit in edge cases. + -- + -- For example, a forced push of branch foo that resets it back + -- several commits can use a previously pushed bar as a prerequisite + -- if it's an ancestor of foo. + findotherprereq ref sha = + findotherprereq' ref sha (M.elems oldtrackingrefs) + findotherprereq' ref _ [] = pure (Git.Bundle.fullBundleSpec ref) + findotherprereq' ref sha (l:ls) + | l == sha = findotherprereq' ref sha ls + | otherwise = ifM (inRepo $ Git.Ref.isAncestor l ref) + ( checkprereq l ref + , findotherprereq' ref sha ls + ) + +-- When the push deletes all refs from the remote, upload an empty +-- manifest and then drop all bundles that were listed in the manifest. +-- The manifest is emptied first so if this is interrupted, only +-- unused bundles will remain in the remote, rather than leaving the +-- remote with a manifest that refers to missing bundles. +pushEmpty :: State -> Remote -> Annex (Bool, State) +pushEmpty st rmt = do + manifest <- maybe (downloadManifestWhenPresent rmt) pure + (manifestCache st) + uploadManifest rmt mempty + ok <- allM (dropKey rmt) + (genManifestKey (Remote.uuid rmt) : inManifest manifest) + return (ok, st { manifestCache = Nothing }) + +data RefSpec = RefSpec + { forcedPush :: Bool + , srcRef :: Maybe Ref -- ^ Nothing when deleting a ref + , dstRef :: Ref + } + deriving (Show) + +-- Any number of push commands can be sent by git, specifying the refspecs +-- to push. They should be followed by a blank line. +collectRefSpecs :: [String] -> ([RefSpec], [String]) +collectRefSpecs = go [] + where + go c (l:ls) = case splitLine l of + ("push", refspec) -> go (parseRefSpec refspec:c) ls + ("", _) -> (c, ls) + _ -> (c, (l:ls)) + go c [] = (c, []) + +parseRefSpec :: String -> RefSpec +parseRefSpec ('+':s) = (parseRefSpec s) { forcedPush = True } +parseRefSpec s = + let (src, cdst) = break (== ':') s + dst = if null cdst then cdst else drop 1 cdst + deletesrc = null src + in RefSpec + -- To delete a ref, have to do a force push of all + -- remaining refs. + { forcedPush = deletesrc + , srcRef = if deletesrc + then Nothing + else Just (Ref (encodeBS src)) + , dstRef = Ref (encodeBS dst) + } + +-- "foo bar" to ("foo", "bar") +-- "foo" to ("foo", "") +splitLine :: String -> (String, String) +splitLine l = + let (c, sv) = break (== ' ') l + v = if null sv then sv else drop 1 sv + in (c, v) + +data SpecialRemoteConfig + = SpecialRemoteConfig + { specialRemoteUUID :: UUID + , specialRemoteConfig :: RemoteConfig + , specialRemoteName :: Maybe RemoteName + , specialRemoteUrl :: String + } + | ExistingSpecialRemote RemoteName + deriving (Show) + +-- The url for a special remote looks like +-- "annex::uuid?param=value¶m=value..." +-- +-- Also accept an url of "annex::", when a remote name is provided, +-- to use an already enabled special remote. +parseSpecialRemoteNameUrl :: String -> String -> Either String SpecialRemoteConfig +parseSpecialRemoteNameUrl remotename url + | url == "annex::" && remotename /= url = Right $ + ExistingSpecialRemote remotename + | "annex::" `isPrefixOf` remotename = parseSpecialRemoteUrl url Nothing + | otherwise = parseSpecialRemoteUrl url (Just remotename) + +parseSpecialRemoteUrl :: String -> Maybe RemoteName -> Either String SpecialRemoteConfig +parseSpecialRemoteUrl url remotename = case parseURI url of + Nothing -> Left "URL parse failed" + Just u -> case uriScheme u of + "annex:" -> case uriPath u of + "" -> Left "annex: URL did not include a UUID" + (':':p) + | null p -> Left "annex: URL did not include a UUID" + | otherwise -> Right $ SpecialRemoteConfig + { specialRemoteUUID = toUUID p + , specialRemoteConfig = parsequery u + , specialRemoteName = remotename + , specialRemoteUrl = url + } + _ -> Left "annex: URL malformed" + _ -> Left "Not an annex: URL" + where + parsequery u = M.fromList $ + map parsekv $ splitc '&' (drop 1 (uriQuery u)) + parsekv kv = + let (k, sv) = break (== '=') kv + v = if null sv then sv else drop 1 sv + in (Proposed (unEscapeString k), Proposed (unEscapeString v)) + +-- Runs an action with a Remote as specified by the SpecialRemoteConfig. +withSpecialRemote :: SpecialRemoteConfig -> StartAnnexBranch -> (Remote -> Annex a) -> Annex a +withSpecialRemote (ExistingSpecialRemote remotename) _ a = + getEnabledSpecialRemoteByName remotename >>= + maybe (giveup $ "There is no special remote named " ++ remotename) + a +withSpecialRemote cfg@(SpecialRemoteConfig {}) sab a = case specialRemoteName cfg of + -- The name could be the name of an existing special remote, + -- if so use it as long as its UUID matches the UUID from the url. + Just remotename -> getEnabledSpecialRemoteByName remotename >>= \case + Just rmt + | Remote.uuid rmt == specialRemoteUUID cfg -> a rmt + | otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename + -- When cloning from an annex:: url, + -- this is used to set up the origin remote. + Nothing -> specialRemoteFromUrl sab + (initremote remotename >>= a) + Nothing -> specialRemoteFromUrl sab inittempremote + where + -- Initialize a new special remote with the provided configuration + -- and name. + initremote remotename = do + let c = M.insert SpecialRemote.nameField (Proposed remotename) + (specialRemoteConfig cfg) + t <- either giveup return (SpecialRemote.findType c) + dummycfg <- liftIO dummyRemoteGitConfig + (c', u) <- Remote.setup t Remote.Init (Just (specialRemoteUUID cfg)) + Nothing c dummycfg + `onException` cleanupremote remotename + Logs.Remote.configSet u c' + setConfig (remoteConfig c' "url") (specialRemoteUrl cfg) + remotesChanged + getEnabledSpecialRemoteByName remotename >>= \case + Just rmt -> case checkSpecialRemoteProblems rmt of + Nothing -> return rmt + Just problem -> do + cleanupremote remotename + giveup problem + Nothing -> do + cleanupremote remotename + giveup "Unable to find special remote after setup." + + -- Temporarily initialize a special remote, and remove it after + -- the action is run. + inittempremote = + let remotename = Git.Remote.makeLegalName $ + "annex-temp-" ++ fromUUID (specialRemoteUUID cfg) + in bracket + (initremote remotename) + (const $ cleanupremote remotename) + a + + cleanupremote remotename = do + l <- inRepo Git.Remote.listRemotes + when (remotename `elem` l) $ + inRepo $ Git.Remote.Remove.remove remotename + +-- When a special remote has already been enabled, just use it. +getEnabledSpecialRemoteByName :: RemoteName -> Annex (Maybe Remote) +getEnabledSpecialRemoteByName remotename = + Remote.byNameOnly remotename >>= \case + Nothing -> return Nothing + Just rmt -> + maybe (return (Just rmt)) giveup + (checkSpecialRemoteProblems rmt) + +parseManifest :: B.ByteString -> Either String Manifest +parseManifest b = + let (outks, inks) = partitionEithers $ map parseline $ B8.lines b + in case (checkvalid [] inks, checkvalid [] outks) of + (Right inks', Right outks') -> + Right $ mkManifest inks' outks' + (Left err, _) -> Left err + (_, Left err) -> Left err + where + parseline l + | "-" `B.isPrefixOf` l = + Left $ deserializeKey' $ B.drop 1 l + | otherwise = + Right $ deserializeKey' l + + checkvalid c [] = Right (reverse c) + checkvalid c (Just k:ks) = case fromKey keyVariety k of + GitBundleKey -> checkvalid (k:c) ks + _ -> Left $ "Wrong type of key in manifest " ++ serializeKey k + checkvalid _ (Nothing:_) = + Left "Error parsing manifest" + +-- Avoid using special remotes that are thirdparty populated, because +-- there is no way to push the git repository keys into one. +-- +-- XXX Avoid using special remotes that are encrypted by key +-- material stored in the git repository, since that would present a +-- chicken and egg problem when cloning. +checkSpecialRemoteProblems :: Remote -> Maybe String +checkSpecialRemoteProblems rmt + | Remote.thirdPartyPopulated (Remote.remotetype rmt) = + Just $ "Cannot use this thirdparty-populated special" + ++ " remote as a git remote." + | importTree (Remote.config rmt) = + Just $ "Using importtree=yes special remotes as git remotes" + ++ " is not yet supported." + | parseEncryptionMethod (unparsedRemoteConfig (Remote.config rmt)) /= Right NoneEncryption + && not (remoteAnnexAllowEncryptedGitRepo (Remote.gitconfig rmt)) = + Just $ "Using an encrypted special remote as a git" + ++ " remote makes it impossible to clone" + ++ " from it. If you will never need to" + ++ " clone from this remote, set: git config " + ++ decodeBS allowencryptedgitrepo ++ " true" + | otherwise = Nothing + where + ConfigKey allowencryptedgitrepo = remoteAnnexConfig rmt "allow-encrypted-gitrepo" + +-- Downloads the Manifest when present in the remote. When not present, +-- returns an empty Manifest. +downloadManifestWhenPresent :: Remote -> Annex Manifest +downloadManifestWhenPresent rmt = fromMaybe mempty <$> downloadManifest rmt + +-- Downloads the Manifest, or fails if the remote does not contain it. +downloadManifestOrFail :: Remote -> Annex Manifest +downloadManifestOrFail rmt = + maybe (giveup "No git repository found in this remote.") return + =<< downloadManifest rmt + +-- Downloads the Manifest or Nothing if the remote does not contain a +-- manifest. +-- +-- Throws errors if the remote cannot be accessed or the download fails, +-- or if the manifest file cannot be parsed. +downloadManifest :: Remote -> Annex (Maybe Manifest) +downloadManifest rmt = getKeyExportLocations rmt mk >>= \case + Nothing -> ifM (Remote.checkPresent rmt mk) + ( gettotmp $ \tmp -> + Remote.retrieveKeyFile rmt mk + (AssociatedFile Nothing) tmp + nullMeterUpdate Remote.NoVerify + , return Nothing + ) + Just locs -> getexport locs + where + mk = genManifestKey (Remote.uuid rmt) + + -- Downloads to a temporary file, rather than using eg + -- Annex.Transfer.download that would put it in the object + -- directory. The content of manifests is not stable, and so + -- it needs to re-download it fresh every time, and the object + -- file should not be stored locally. + gettotmp dl = withTmpFile "GITMANIFEST" $ \tmp tmph -> do + liftIO $ hClose tmph + _ <- dl tmp + b <- liftIO (B.readFile tmp) + case parseManifest b of + Right m -> return (Just m) + Left err -> giveup err + + getexport [] = return Nothing + getexport (loc:locs) = + ifM (Remote.checkPresentExport (Remote.exportActions rmt) mk loc) + ( gettotmp $ \tmp -> + Remote.retrieveExport (Remote.exportActions rmt) + mk loc tmp nullMeterUpdate + , getexport locs + ) + +-- Uploads the Manifest to the remote. +-- +-- Throws errors if the remote cannot be accessed or the upload fails. +-- +-- The manifest key is first dropped from the remote, then the new +-- content is uploaded. This is necessary because the same key is used, +-- and behavior of remotes is undefined when sending a key that is +-- already present on the remote, but with different content. +-- +-- Note that if this is interrupted or loses access to the remote part +-- way through, it may leave the remote without a manifest file. That will +-- appear as if all refs have been deleted from the remote. +-- XXX It should be possible to remember when that happened, by writing +-- state to a file before, and then the next time git-remote-annex is run, it +-- could recover from the situation. +uploadManifest :: Remote -> Manifest -> Annex () +uploadManifest rmt manifest = + withTmpFile "GITMANIFEST" $ \tmp tmph -> do + liftIO $ forM_ (inManifest manifest) $ \bundlekey -> + B8.hPutStrLn tmph (serializeKey' bundlekey) + liftIO $ hClose tmph + -- Remove old manifest if present. + dropKey' rmt mk + -- storeKey needs the key to be in the annex objects + -- directory, so put the manifest file there temporarily. + -- Using linkOrCopy rather than moveAnnex to avoid updating + -- InodeCache database. Also, works even when the repository + -- is configured to require only cryptographically secure + -- keys, which it is not. + objfile <- calcRepo (gitAnnexLocation mk) + res <- modifyContentDir objfile $ + linkOrCopy mk (toRawFilePath tmp) objfile Nothing + unless (isJust res) + uploadfailed + ok <- (uploadGitObject rmt mk >> pure True) + `catchNonAsync` (const (pure False)) + -- Don't leave the manifest key in the annex objects + -- directory. + unlinkAnnex mk + unless ok + uploadfailed + where + mk = genManifestKey (Remote.uuid rmt) + uploadfailed = giveup $ "Failed to upload " ++ serializeKey mk + +-- Downloads a git bundle to the annex objects directory, unless +-- the object file is already present. Returns the filename of the object +-- file. +-- +-- Throws errors if the download fails, or the checksum does not verify. +-- +-- This does not update the location log to indicate that the local +-- repository contains the git bundle object. Reasons not to include: +-- 1. When this is being used in a git clone, the repository will not have +-- a UUID yet. +-- 2. It would unncessarily bloat the git-annex branch, which would then +-- lead to more things needing to be pushed to the special remote, +-- and so more things pulled from it, etc. +-- 3. Git bundle objects are not usually transferred between repositories +-- except special remotes (although the user can if they want to). +downloadGitBundle :: Remote -> Key -> Annex FilePath +downloadGitBundle rmt k = getKeyExportLocations rmt k >>= \case + Nothing -> dlwith $ + download rmt k (AssociatedFile Nothing) stdRetry noNotification + Just locs -> dlwith $ + anyM getexport locs + where + dlwith a = ifM a + ( decodeBS <$> calcRepo (gitAnnexLocation k) + , giveup $ "Failed to download " ++ serializeKey k + ) + + getexport loc = catchNonAsync (getexport' loc) (const (pure False)) + getexport' loc = + getViaTmp rsp vc k (AssociatedFile Nothing) Nothing $ \tmp -> do + v <- Remote.retrieveExport (Remote.exportActions rmt) + k loc (decodeBS tmp) nullMeterUpdate + return (True, v) + rsp = Remote.retrievalSecurityPolicy rmt + vc = Remote.RemoteVerify rmt + +-- Uploads a bundle or manifest object from the annex objects directory +-- to the remote. +-- +-- Throws errors if the upload fails. +-- +-- This does not update the location log to indicate that the remote +-- contains the git object. +uploadGitObject :: Remote -> Key -> Annex () +uploadGitObject rmt k = getKeyExportLocations rmt k >>= \case + Just (loc:_) -> do + objfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation k) + Remote.storeExport (Remote.exportActions rmt) objfile k loc nullMeterUpdate + _ -> + unlessM (upload rmt k (AssociatedFile Nothing) retry noNotification) $ + giveup $ "Failed to upload " ++ serializeKey k + where + retry = case fromKey keyVariety k of + GitBundleKey -> stdRetry + -- Manifest keys are not stable + _ -> noRetry + +-- Generates a git bundle, ingests it into the local objects directory, +-- and uploads its key to the special remote. +-- +-- If the key is already present in the provided manifest, avoids +-- uploading it. +-- +-- On failure, an exception is thrown, and nothing is added to the local +-- objects directory. +generateAndUploadGitBundle + :: Remote + -> [Git.Bundle.BundleSpec] + -> Manifest + -> Annex Key +generateAndUploadGitBundle rmt bs manifest = + withTmpFile "GITBUNDLE" $ \tmp tmph -> do + liftIO $ hClose tmph + inRepo $ Git.Bundle.create tmp bs + bundlekey <- genGitBundleKey (Remote.uuid rmt) + (toRawFilePath tmp) nullMeterUpdate + unless (bundlekey `elem` (inManifest manifest)) $ do + unlessM (moveAnnex bundlekey (AssociatedFile Nothing) (toRawFilePath tmp)) $ + giveup "Unable to push" + uploadGitObject rmt bundlekey + `onException` unlinkAnnex bundlekey + return bundlekey + +dropKey :: Remote -> Key -> Annex Bool +dropKey rmt k = tryNonAsync (dropKey' rmt k) >>= \case + Right () -> return True + Left ex -> do + liftIO $ hPutStrLn stderr $ + "Failed to drop " + ++ serializeKey k + ++ " (" ++ show ex ++ ")" + return False + +dropKey' :: Remote -> Key -> Annex () +dropKey' rmt k = getKeyExportLocations rmt k >>= \case + Nothing -> Remote.removeKey rmt k + Just locs -> forM_ locs $ \loc -> + Remote.removeExport (Remote.exportActions rmt) k loc + +getKeyExportLocations :: Remote -> Key -> Annex (Maybe [ExportLocation]) +getKeyExportLocations rmt k = do + cfg <- Annex.getGitConfig + u <- getUUID + return $ keyExportLocations rmt k cfg u + +-- When the remote contains a tree, the git keys are stored +-- inside the .git/annex/objects/ directory in the remote. +-- +-- The first ExportLocation in the returned list is the one that +-- is the same as the local repository would use. But it's possible +-- that one of the others in the list was used by another repository to +-- upload a git key. +keyExportLocations :: Remote -> Key -> GitConfig -> UUID -> Maybe [ExportLocation] +keyExportLocations rmt k cfg uuid + | exportTree (Remote.config rmt) || importTree (Remote.config rmt) = + Just $ map (\p -> mkExportLocation (".git" P. p)) $ + concatMap (`annexLocationsNonBare` k) cfgs + | otherwise = Nothing + where + -- When git-annex has not been initialized yet (eg, when cloning), + -- the Differences are unknown, so make a version of the GitConfig + -- with and without the OneLevelObjectHash difference. + cfgs + | uuid /= NoUUID = [cfg] + | hasDifference OneLevelObjectHash (annexDifferences cfg) = + [ cfg + , cfg { annexDifferences = mempty } + ] + | otherwise = + [ cfg + , cfg + { annexDifferences = mkDifferences + (S.singleton OneLevelObjectHash) + } + ] + +-- Tracking refs are used to remember the refs that are currently on the +-- remote. This is different from git's remote tracking branches, since it +-- needs to track all refs on the remote, not only the refs that the user +-- chooses to fetch. +-- +-- For refs/heads/master, the tracking ref is +-- refs/namespaces/git-remote-annex/uuid/refs/heads/master, +-- using the uuid of the remote. See gitnamespaces(7). +trackingRefPrefix :: Remote -> B.ByteString +trackingRefPrefix rmt = "refs/namespaces/git-remote-annex/" + <> fromUUID (Remote.uuid rmt) <> "/" + +toTrackingRef :: Remote -> Ref -> Ref +toTrackingRef rmt (Ref r) = Ref $ trackingRefPrefix rmt <> r + +-- If the ref is not a tracking ref, it is returned as-is. +fromTrackingRef :: Remote -> Ref -> Ref +fromTrackingRef rmt = Git.Ref.removeBase (decodeBS (trackingRefPrefix rmt)) + +-- Update the tracking refs to be those in the map. +-- When deleteold is set, any other tracking refs are deleted. +updateTrackingRefs :: Bool -> Remote -> M.Map Ref Sha -> Annex () +updateTrackingRefs deleteold rmt new = do + old <- inRepo $ Git.Ref.forEachRef + [Param (decodeBS (trackingRefPrefix rmt))] + + -- Delete all tracking refs that are not in the map. + when deleteold $ + forM_ (filter (\p -> M.notMember (fst p) new) old) $ \(s, r) -> + inRepo $ Git.Ref.delete s r + + -- Update all changed tracking refs. + let oldmap = M.fromList (map (\(s, r) -> (r, s)) old) + forM_ (M.toList new) $ \(r, s) -> + case M.lookup r oldmap of + Just s' | s' == s -> noop + _ -> inRepo $ Git.Branch.update' r s + +-- git clone does not bother to set GIT_WORK_TREE when running this +-- program, and it does not run it inside the new git repo either. +-- GIT_DIR is set to the new git directory. So, have to override +-- the worktree to be the parent of the gitdir. +getRepo :: IO Repo +getRepo = getEnv "GIT_WORK_TREE" >>= \case + Just _ -> Git.CurrentRepo.get + Nothing -> fixup <$> Git.CurrentRepo.get + where + fixup r@(Repo { location = loc@(Local { worktree = Just _ }) }) = + r { location = loc { worktree = Just (P.takeDirectory (gitdir loc)) } } + fixup r = r + +-- Records what the git-annex branch was at the beginning of this command. +data StartAnnexBranch + = AnnexBranchExistedAlready Sha + | AnnexBranchCreatedEmpty Sha + +{- Run early in the command, gets the initial state of the git-annex + - branch. + - + - If the branch does not exist yet, it's created here. This is done + - because it's hard to avoid the branch being created by this command, + - so tracking the sha of the created branch allows cleaning it up later. + -} +startAnnexBranch :: Annex StartAnnexBranch +startAnnexBranch = ifM (null <$> Annex.Branch.siblingBranches) + ( AnnexBranchCreatedEmpty <$> Annex.Branch.getBranch + , AnnexBranchExistedAlready <$> Annex.Branch.getBranch + ) + +-- This runs an action that will set up a special remote that +-- was specified using an annex url. +-- +-- Setting up a special remote needs to write its config to the git-annex +-- branch. And using a special remote may also write to the branch. +-- But in this case, writes to the git-annex branch need to be avoided, +-- so that cleanupInitialization can leave things in the right state. +-- +-- So this prevents commits to the git-annex branch, and redirects all +-- journal writes to a temporary directory, so that all writes +-- to the git-annex branch by the action will be discarded. +specialRemoteFromUrl :: StartAnnexBranch -> Annex a -> Annex a +specialRemoteFromUrl sab a = withTmpDir "journal" $ \tmpdir -> do + Annex.overrideGitConfig $ \c -> + c { annexAlwaysCommit = False } + Annex.BranchState.changeState $ \st -> + st { alternateJournal = Just (toRawFilePath tmpdir) } + a `finally` cleanupInitialization sab + +-- If the git-annex branch did not exist when this command started, +-- it was created empty by this command, and this command has avoided +-- making any other commits to it. If nothing else has written to the +-- branch while this command was running, the branch will be deleted. +-- That allows for the git-annex branch that is fetched from the special +-- remote to contain Differences, which would prevent it from being merged +-- with the git-annex branch created by this command. +-- +-- If there is still not a sibling git-annex branch, this deletes all annex +-- objects for git bundles from the annex objects directory, and deletes +-- the annex objects directory. That is necessary to avoid the +-- Annex.Init.objectDirNotPresent check preventing a later initialization. +-- And if the later initialization includes Differences, the git bundle +-- objects downloaded by this process would be in the wrong locations. +-- +-- When there is now a sibling git-annex branch, this handles +-- initialization. When the initialized git-annex branch has Differences, +-- the git bundle objects are in the wrong place, so have to be deleted. +cleanupInitialization :: StartAnnexBranch -> Annex () +cleanupInitialization sab = do + case sab of + AnnexBranchExistedAlready _ -> noop + AnnexBranchCreatedEmpty r -> + whenM ((r ==) <$> Annex.Branch.getBranch) $ do + inRepo $ Git.Branch.delete Annex.Branch.fullname + indexfile <- fromRepo gitAnnexIndex + liftIO $ removeWhenExistsWith R.removeLink indexfile + ifM Annex.Branch.hasSibling + ( do + autoInitialize' (pure True) remoteList + differences <- allDifferences <$> recordedDifferences + when (differences /= mempty) $ + deletebundleobjects + , deletebundleobjects + ) + where + deletebundleobjects = do + annexobjectdir <- fromRepo gitAnnexObjectDir + ks <- listKeys InAnnex + forM_ ks $ \k -> case fromKey keyVariety k of + GitBundleKey -> lockContentForRemoval k noop removeAnnex + _ -> noop + void $ liftIO $ tryIO $ removeDirectory (decodeBS annexobjectdir) diff --git a/CmdLine/GitRemoteTorAnnex.hs b/CmdLine/GitRemoteTorAnnex.hs index 3349304fb2..c3aa13c3f0 100644 --- a/CmdLine/GitRemoteTorAnnex.hs +++ b/CmdLine/GitRemoteTorAnnex.hs @@ -25,7 +25,7 @@ run (_remotename:address:[]) = forever $ "capabilities" -> putStrLn "connect" >> ready "connect git-upload-pack" -> go UploadPack "connect git-receive-pack" -> go ReceivePack - l -> giveup $ "git-remote-helpers protocol error at " ++ show l + l -> giveup $ "gitremote-helpers protocol error at " ++ show l where (onionaddress, onionport) | '/' `elem` address = parseAddressPort $ diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 2212c83007..f08d09f89f 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -38,6 +38,7 @@ import Utility.CopyFile import Git.FilePath import Utility.PID import Utility.InodeCache +import Utility.Metered import Annex.InodeSentinal import qualified Database.Keys import qualified Database.Fsck as FsckDb @@ -206,8 +207,7 @@ performRemote key afile numcopies remote = ) , return Nothing ) - getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) dummymeter (RemoteVerify remote) - dummymeter _ = noop + getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) nullMeterUpdate (RemoteVerify remote) getcheap tmp = case Remote.retrieveKeyFileCheap remote of Just a -> isRight <$> tryNonAsync (a key afile (fromRawFilePath tmp)) Nothing -> return False diff --git a/Command/Unused.hs b/Command/Unused.hs index eebe24ca36..75cf94a3e2 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2016 Joey Hess + - Copyright 2010-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -34,6 +34,7 @@ import Logs.View (is_branchView) import Annex.BloomFilter import qualified Database.Keys import Annex.InodeSentinal +import Backend.GitRemoteAnnex (isGitRemoteAnnexKey) import qualified Data.Map as M import qualified Data.ByteString as S @@ -104,7 +105,8 @@ checkRemoteUnused remotename refspec = go =<< Remote.nameToUUID remotename _ <- check "" (remoteUnusedMsg r remotename) (remoteunused u) 0 next $ return True remoteunused u = loggedKeysFor u >>= \case - Just ks -> excludeReferenced refspec ks + Just ks -> filter (not . isGitRemoteAnnexKey u) + <$> excludeReferenced refspec ks Nothing -> giveup "This repository is read-only." check :: String -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int diff --git a/Git/Bundle.hs b/Git/Bundle.hs new file mode 100644 index 0000000000..caa4d12ec9 --- /dev/null +++ b/Git/Bundle.hs @@ -0,0 +1,68 @@ +{- git bundles + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Git.Bundle where + +import Common +import Git +import Git.Command + +import Data.Char (ord) +import qualified Data.ByteString.Char8 as S8 + +listHeads :: FilePath -> Repo -> IO [(Sha, Ref)] +listHeads bundle repo = map gen . S8.lines <$> + pipeReadStrict [Param "bundle", Param "list-heads", File bundle] repo + where + gen l = let (s, r) = separate' (== fromIntegral (ord ' ')) l + in (Ref s, Ref r) + +unbundle :: FilePath -> Repo -> IO () +unbundle bundle = runQuiet [Param "bundle", Param "unbundle", File bundle] + +-- Specifies what to include in the bundle. +data BundleSpec = BundleSpec + { preRequisiteRef :: Maybe Ref + -- ^ Do not include this Ref, or any objects reachable from it + -- in the bundle. This should be an ancestor of the includeRef. + , includeRef :: Ref + -- ^ Include this Ref and objects reachable from it in the bundle, + -- unless filtered out by the preRequisiteRef of this BundleSpec + -- or any other one that is included in the bundle. + } + deriving (Show) + +-- Include the ref and all objects reachable from it in the bundle. +-- (Unless another BundleSpec is included that has a preRequisiteRef +-- that filters out the ref or other objects.) +fullBundleSpec :: Ref -> BundleSpec +fullBundleSpec r = BundleSpec + { preRequisiteRef = Nothing + , includeRef = r + } + +create :: FilePath -> [BundleSpec] -> Repo -> IO () +create bundle revs repo = pipeWrite + [ Param "bundle" + , Param "create" + , Param "--quiet" + , File bundle + , Param "--stdin" + ] repo writer + where + writer h = do + forM_ revs $ \bs -> + case preRequisiteRef bs of + Nothing -> S8.hPutStrLn h $ + fromRef' (includeRef bs) + Just pr -> S8.hPutStrLn h $ + fromRef' pr + <> ".." <> + fromRef' (includeRef bs) + hClose h diff --git a/Git/Ref.hs b/Git/Ref.hs index 2d2874a7ef..2767ae339c 100644 --- a/Git/Ref.hs +++ b/Git/Ref.hs @@ -1,6 +1,6 @@ {- git ref stuff - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -165,8 +165,18 @@ matchingUniq refs repo = nubBy uniqref <$> matching refs repo list :: Repo -> IO [(Sha, Ref)] list = matching' [] [] -{- Deletes a ref. This can delete refs that are not branches, - - which git branch --delete refuses to delete. -} +{- Lists refs using for-each-ref. -} +forEachRef :: [CommandParam] -> Repo -> IO [(Sha, Branch)] +forEachRef ps repo = map gen . S8.lines <$> + pipeReadStrict (Param "for-each-ref" : ps ++ [format]) repo + where + format = Param "--format=%(objectname) %(refname)" + gen l = let (r, b) = separate' (== fromIntegral (ord ' ')) l + in (Ref r, Ref b) + +{- Deletes a ref when it contains the specified sha. + - This can delete refs that are not branches, which + - git branch --delete refuses to delete. -} delete :: Sha -> Ref -> Repo -> IO () delete oldvalue ref = run [ Param "update-ref" @@ -175,6 +185,14 @@ delete oldvalue ref = run , Param $ fromRef oldvalue ] +{- Deletes a ref no matter what it contains. -} +delete' :: Ref -> Repo -> IO () +delete' ref = run + [ Param "update-ref" + , Param "-d" + , Param $ fromRef ref + ] + {- Gets the sha of the tree a ref uses. - - The ref may be something like a branch name, and it could contain @@ -192,6 +210,19 @@ tree (Ref ref) = extractSha <$$> pipeReadStrict -- de-reference commit objects to the tree else ref <> ":" +{- Check if the first ref is an ancestor of the second ref. + - + - Note that if the two refs point to the same commit, it is considered + - to be an ancestor of itself. + -} +isAncestor :: Ref -> Ref -> Repo -> IO Bool +isAncestor r1 r2 = runBool + [ Param "merge-base" + , Param "--is-ancestor" + , Param (fromRef r1) + , Param (fromRef r2) + ] + {- Checks if a String is a legal git ref name. - - The rules for this are complex; see git-check-ref-format(1) -} diff --git a/Git/Remote.hs b/Git/Remote.hs index 9cdaad61ca..4eb6780fcc 100644 --- a/Git/Remote.hs +++ b/Git/Remote.hs @@ -1,6 +1,6 @@ {- git remote stuff - - - Copyright 2012-2021 Joey Hess + - Copyright 2012-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -13,6 +13,7 @@ module Git.Remote where import Common import Git import Git.Types +import Git.Command import Data.Char import qualified Data.Map as M @@ -23,6 +24,11 @@ import Network.URI import Git.FilePath #endif +{- Lists all currently existing git remotes. -} +listRemotes :: Repo -> IO [RemoteName] +listRemotes repo = map decodeBS . S8.lines + <$> pipeReadStrict [Param "remote"] repo + {- Is a git config key one that specifies the url of a remote? -} isRemoteUrlKey :: ConfigKey -> Bool isRemoteUrlKey = isRemoteKey "url" diff --git a/Makefile b/Makefile index 06ebebdea3..fc64201f72 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all=git-annex git-annex-shell mans docs +all=git-annex git-annex-shell git-remote-annex mans docs # set to "./Setup" if you lack a cabal program. Or can be set to "stack" BUILDER?=cabal @@ -70,6 +70,9 @@ git-annex: tmp/configure-stamp git-annex-shell: git-annex ln -sf git-annex git-annex-shell +git-remote-annex: git-annex + ln -sf git-annex git-remote-annex + # These are not built normally. git-union-merge.1: doc/git-union-merge.mdwn ./Build/mdwn2man git-union-merge 1 doc/git-union-merge.mdwn > git-union-merge.1 @@ -90,6 +93,7 @@ install-bins: build install -d $(DESTDIR)$(PREFIX)/bin install git-annex $(DESTDIR)$(PREFIX)/bin ln -sf git-annex $(DESTDIR)$(PREFIX)/bin/git-annex-shell + ln -sf git-annex $(DESTDIR)$(PREFIX)/bin/git-remote-annex ln -sf git-annex $(DESTDIR)$(PREFIX)/bin/git-remote-tor-annex install-desktop: build Build/InstallDesktopFile @@ -141,7 +145,7 @@ clean: doc/.ikiwiki html dist tags Build/SysConfig Build/Version \ Setup Build/InstallDesktopFile Build/Standalone \ Build/DistributionUpdate Build/BuildVersion Build/MakeMans \ - git-annex-shell git-union-merge .tasty-rerun-log + git-annex-shell git-remote-annex git-union-merge .tasty-rerun-log find . -name \*.o -exec rm {} \; find . -name \*.hi -exec rm {} \; diff --git a/Remote/Git.hs b/Remote/Git.hs index bba505e378..a234fd0fbb 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -92,7 +92,7 @@ list :: Bool -> Annex [Git.Repo] list autoinit = do c <- fromRepo Git.config rs <- mapM (tweakurl c) =<< Annex.getGitRemotes - mapM (configRead autoinit) rs + mapM (configRead autoinit) (filter (not . isGitRemoteAnnex) rs) where annexurl r = remoteConfig r "annexurl" tweakurl c r = do @@ -103,6 +103,9 @@ list autoinit = do Git.Construct.remoteNamed n $ Git.Construct.fromRemoteLocation (Git.fromConfigValue url) False g +isGitRemoteAnnex :: Git.Repo -> Bool +isGitRemoteAnnex r = "annex::" `isPrefixOf` Git.repoLocation r + {- Git remotes are normally set up using standard git commands, not - git-annex initremote and enableremote. - diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 884d53d7bf..9f4bd7fcb1 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -14,6 +14,7 @@ module Remote.Helper.Encryptable ( encryptionAlreadySetup, encryptionConfigParsers, parseEncryptionConfig, + parseEncryptionMethod, remoteCipher, remoteCipher', embedCreds, @@ -85,7 +86,7 @@ encryptionFieldParser :: RemoteConfigFieldParser encryptionFieldParser = RemoteConfigFieldParser { parserForField = encryptionField , valueParser = \v c -> Just . RemoteConfigValue - <$> parseEncryptionMethod (fmap fromProposedAccepted v) c + <$> parseEncryptionMethod' v c , fieldDesc = FieldDesc "how to encrypt data stored in the special remote" , valueDesc = Just $ ValueDesc $ intercalate " or " (M.keys encryptionMethods) @@ -100,14 +101,18 @@ encryptionMethods = M.fromList , ("sharedpubkey", SharedPubKeyEncryption) ] -parseEncryptionMethod :: Maybe String -> RemoteConfig -> Either String EncryptionMethod -parseEncryptionMethod (Just s) _ = case M.lookup s encryptionMethods of - Just em -> Right em - Nothing -> Left badEncryptionMethod +parseEncryptionMethod :: RemoteConfig -> Either String EncryptionMethod +parseEncryptionMethod c = parseEncryptionMethod' (M.lookup encryptionField c) c + +parseEncryptionMethod' :: Maybe (ProposedAccepted String) -> RemoteConfig -> Either String EncryptionMethod +parseEncryptionMethod' (Just s) _ = + case M.lookup (fromProposedAccepted s) encryptionMethods of + Just em -> Right em + Nothing -> Left badEncryptionMethod -- Hybrid encryption is the default when a keyid is specified without -- an encryption field, or when there's a cipher already but no encryption -- field. -parseEncryptionMethod Nothing c +parseEncryptionMethod' Nothing c | M.member (Accepted "keyid") c || M.member cipherField c = Right HybridEncryption | otherwise = Left badEncryptionMethod @@ -162,7 +167,7 @@ encryptionSetup c gc = do maybe (genCipher pc gpgcmd) (updateCipher pc gpgcmd) (extractCipher pc) where -- The type of encryption - encryption = parseEncryptionMethod (fromProposedAccepted <$> M.lookup encryptionField c) c + encryption = parseEncryptionMethod c -- Generate a new cipher, depending on the chosen encryption scheme genCipher pc gpgcmd = case encryption of Right NoneEncryption -> return (c, NoEncryption) diff --git a/Types/BranchState.hs b/Types/BranchState.hs index 129a17b349..d79a1c70a6 100644 --- a/Types/BranchState.hs +++ b/Types/BranchState.hs @@ -36,7 +36,10 @@ data BranchState = BranchState -- process need to be noticed while the current process is running? -- (This makes the journal always be read, and avoids using the -- cache.) + , alternateJournal :: Maybe RawFilePath + -- ^ use this directory for all journals, rather than the + -- gitAnnexJournalDir and gitAnnexPrivateJournalDir. } startBranchState :: BranchState -startBranchState = BranchState False False False [] [] [] False +startBranchState = BranchState False False False [] [] [] False Nothing diff --git a/Types/Difference.hs b/Types/Difference.hs index 0617dc22a1..93a175c76c 100644 --- a/Types/Difference.hs +++ b/Types/Difference.hs @@ -17,6 +17,7 @@ module Types.Difference ( differenceConfigVal, hasDifference, listDifferences, + mkDifferences, ) where import Utility.PartialPrelude diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 26540b8484..b24ae48eb8 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -373,6 +373,8 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexBwLimitDownload :: Maybe BwRate , remoteAnnexAllowUnverifiedDownloads :: Bool , remoteAnnexConfigUUID :: Maybe UUID + , remoteAnnexMaxGitBundles :: Int + , remoteAnnexAllowEncryptedGitRepo :: Bool {- These settings are specific to particular types of remotes - including special remotes. -} @@ -430,7 +432,8 @@ extractRemoteGitConfig r remotename = do , remoteAnnexTrustLevel = notempty $ getmaybe "trustlevel" , remoteAnnexStartCommand = notempty $ getmaybe "start-command" , remoteAnnexStopCommand = notempty $ getmaybe "stop-command" - , remoteAnnexSpeculatePresent = getbool "speculate-present" False + , remoteAnnexSpeculatePresent = + getbool "speculate-present" False , remoteAnnexBare = getmaybebool "bare" , remoteAnnexRetry = getmayberead "retry" , remoteAnnexForwardRetry = getmayberead "forward-retry" @@ -476,6 +479,10 @@ extractRemoteGitConfig r remotename = do , remoteAnnexDdarRepo = getmaybe "ddarrepo" , remoteAnnexHookType = notempty $ getmaybe "hooktype" , remoteAnnexExternalType = notempty $ getmaybe "externaltype" + , remoteAnnexMaxGitBundles = + fromMaybe 100 (getmayberead "max-git-bundles") + , remoteAnnexAllowEncryptedGitRepo = + getbool "allow-encrypted-gitrepo" False } where getbool k d = fromMaybe d $ getmaybebool k diff --git a/Types/GitRemoteAnnex.hs b/Types/GitRemoteAnnex.hs new file mode 100644 index 0000000000..8dae944e59 --- /dev/null +++ b/Types/GitRemoteAnnex.hs @@ -0,0 +1,44 @@ +{- git-remote-annex types + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Types.GitRemoteAnnex + ( Manifest + , mkManifest + , inManifest + , outManifest + ) where + +import Types.Key + +import qualified Data.Semigroup as Sem + +-- The manifest contains an ordered list of git bundle keys. +-- +-- There is a second list of git bundle keys that are no longer +-- used and should be deleted. This list should never contain keys +-- that are in the first list. +data Manifest = + Manifest + { inManifest :: [Key] + , outManifest :: [Key] + } + deriving (Show) + +-- Smart constructor for Manifest. Preserves outManifest invariant. +mkManifest + :: [Key] -- ^ inManifest + -> [Key] -- ^ outManifest + -> Manifest +mkManifest inks outks = Manifest inks (filter (`notElem` inks) outks) + +instance Monoid Manifest where + mempty = Manifest [] [] + +instance Sem.Semigroup Manifest where + a <> b = mkManifest + (inManifest a <> inManifest b) + (outManifest a <> outManifest b) diff --git a/Types/Key.hs b/Types/Key.hs index 2aeb7613a9..2d901c0af7 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -219,6 +219,8 @@ data KeyVariety | WORMKey | URLKey | VURLKey + | GitBundleKey + | GitManifestKey -- A key that is handled by some external backend. | ExternalKey S.ByteString HasExt -- Some repositories may contain keys of other varieties, @@ -253,6 +255,8 @@ hasExt (MD5Key (HasExt b)) = b hasExt WORMKey = False hasExt URLKey = False hasExt VURLKey = False +hasExt GitBundleKey = False +hasExt GitManifestKey = False hasExt (ExternalKey _ (HasExt b)) = b hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E' @@ -282,6 +286,8 @@ formatKeyVariety v = case v of WORMKey -> "WORM" URLKey -> "URL" VURLKey -> "VURL" + GitBundleKey -> "GITBUNDLE" + GitManifestKey -> "GITMANIFEST" ExternalKey s e -> adde e ("X" <> s) OtherKey s -> s where @@ -347,6 +353,7 @@ parseKeyVariety "MD5E" = MD5Key (HasExt True) parseKeyVariety "WORM" = WORMKey parseKeyVariety "URL" = URLKey parseKeyVariety "VURL" = VURLKey +parseKeyVariety "GITBUNDLE" = GitBundleKey parseKeyVariety b | "X" `S.isPrefixOf` b = let b' = S.tail b diff --git a/doc/backends.mdwn b/doc/backends.mdwn index da37597902..d24fe0e654 100644 --- a/doc/backends.mdwn +++ b/doc/backends.mdwn @@ -79,10 +79,6 @@ content of an annexed file remains unchanged. passing it to a shell script. These types of keys are distinct from URLs/URIs that may be attached to a key (using any backend) indicating the key's location on the web or in one of [[special_remotes]]. -* `GIT` -- This is used internally by git-annex when exporting trees - containing files stored in git, rather than git-annex. It represents a - git sha. This is never used for git-annex links, but information about - keys of this type is stored in the git-annex branch. ## external backends @@ -100,6 +96,19 @@ Like with git-annex's builtin backends, you can add "E" to the end of the name of an external backend, to get a version that includes the file extension in the key. +## internal use backends + +Keys using these backends can sometimes be visible, but they are used by +git-annex for its own purposes, and not for your annexed files. + +* `GIT` -- This is used internally by git-annex when exporting trees + containing files stored in git, rather than git-annex. It represents a + git sha. This is never used for git-annex links, but information about + keys of this type is stored in the git-annex branch. +* `GITBUNDLE` and `GITMANIFEST` -- Used by [[git-remote-annex]] to store + a git repository in a special remote. See + [[this_page|internals/git-remote-annex]] for details about these. + ## notes If you want to be able to prove that you're working with the same file diff --git a/doc/future_proofing.mdwn b/doc/future_proofing.mdwn index 369aa7d890..84883d060f 100644 --- a/doc/future_proofing.mdwn +++ b/doc/future_proofing.mdwn @@ -49,5 +49,5 @@ problem: [[fairly simple shell script using standard tools|tips/Decrypting_files_in_special_remotes_without_git-annex]] (gpg and openssl) can decrypt files stored on such a remote, as long as you have access to the encryption keys (which - are stored in the git-annex branch of the repository, sometimes - encrypted with your gpg key). + for some types of encryption are stored in the git-annex branch of + the repository, sometimes encrypted with your gpg key). diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 59dacb0229..bb0a6172cf 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1634,10 +1634,6 @@ Remotes are configured using these settings in `.git/config`. configured by the trust and untrust commands. The value can be any of "trusted", "semitrusted" or "untrusted". -* `remote..annex-availability` - - This configuration setting is no longer used. - * `remote..annex-speculate-present` Set to "true" to make git-annex speculate that this remote may contain the @@ -1652,11 +1648,36 @@ Remotes are configured using these settings in `.git/config`. remotes, and is set when using [[git-annex-initremote]](1) with the `--private` option. +* `remote..annex-max-git-bundles`, `annex.max-git-bundles` + + When using [[git-remote-annex]] to store a git repository in a special + remote, this configures how many separate git bundle objects to store + in the special remote before re-uploading a single git bundle that contains + the entire git repository. + + The default is 100, which aims to avoid often needing to often re-upload, + while preventing a new clone needing to download too many objects. Set to + 0 to disable re-uploading. + +* `remote..annex-allow-encrypted-gitrepo` + + Setting this to true allows using [[git-remote-annex]] to push the git + repository to an encrypted special remote. + + That is not allowed by default, because it is impossible to git clone + from an encrypted special remote, since it needs encryption keys stored + in the remote. So take care that, if you set this, you don't rely + on the encrypted special remote being the only copy of your git repository. + * `remote..annex-bare` Can be used to tell git-annex if a remote is a bare repository or not. Normally, git-annex determines this automatically. +* `remote..annex-availability` + + This configuration setting is no longer used. + * `remote..annex-ssh-options` Options to use when using ssh to talk to this remote. @@ -2194,6 +2215,10 @@ More git-annex documentation is available on its web site, If git-annex is installed from a package, a copy of its documentation should be included, in, for example, `/usr/share/doc/git-annex/`. +* [[git-annex-shell]](1) +* [[git-remote-annex]](1) +* [[git-remote-tor-annex]](1) + # AUTHOR Joey Hess diff --git a/doc/git-remote-annex.mdwn b/doc/git-remote-annex.mdwn new file mode 100644 index 0000000000..52d9a11ccb --- /dev/null +++ b/doc/git-remote-annex.mdwn @@ -0,0 +1,71 @@ +# NAME + +git-remote-annex - remote helper program to store a git repository in a git-annex special remote + +# SYNOPSIS + +git fetch annex::uuid?param=value¶m=value... + +# DESCRIPTION + +This is a git remote helper program that allows git to clone, +pull and push from a git repository that is stored in a git-annex +special remote. + +The format of the remote URL is "annex::" followed by the UUID of the +special remote, and then followed by all of the configuration parameters of +the special remote. + +For example, to clone from a directory special remote: + + git clone annex::358ff77e-0bc3-11ef-bc49-872e6695c0e3?type=directory&encryption=none&directory=/mnt/foo/ + +When configuring the url of an existing special remote, a +shorter url of "annex::" is sufficient. For example: + + git-annex initremote foo type=directory encryption=none directory=/mnt/foo + git config remote.foo.url annex:: + git push foo master + +Configuring the url like that is automatically done when cloning from a +special remote, but not by [[git-annex-initremote]](1) and +[[git-annex-enableremote]](1). + +When a special remote needs some additional credentials to be provided, +they are not included in the URL, and need to be provided when cloning from +the special remote. That is typically done by setting environment +variables. Some special remotes may also need environment variables to be +set when pulling or pushing. + +The git repository is stored in the special remote using special annex objects +with names starting with "GITMANIFEST--" and "GITBUNDLE--". For details about +how the git repository is stored, see + + +Pushes to a special remote are usually done incrementally. However, +sometimes the whole git repository (but not the annex) needs to be +re-uploaded. That is done when deleting a ref from the remote. It's also +done when too many git bundles accumulate in the special remote, as +configured by the `remote..annex-max-git-bundles` git config. + +Like any git repository, a git repository stored on a special remote can +have conflicting things pushed to it from different places. This mostly +works the same as any other git repository, eg a push that overwrites other +work will be prevented unless forced. However, it is possible, when +conflicting pushes are being done at the same time, for one of the pushes +to be overwritten by the other one. In this sitiation, the push will appear +to have succeeded, but pulling later will show the true situation. + +# SEE ALSO + +gitremote-helpers(1) + +[[git-annex]](1) + +[[git-annex-initremote]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-remote-tor-annex.mdwn b/doc/git-remote-tor-annex.mdwn index 4e41de877b..e32b711e4c 100644 --- a/doc/git-remote-tor-annex.mdwn +++ b/doc/git-remote-tor-annex.mdwn @@ -21,7 +21,7 @@ service, its first line is used as the authtoken. # SEE ALSO -git-remote-helpers(1) +gitremote-helpers(1) [[git-annex]](1) diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 09225312fc..f0ee6f66c1 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -2,6 +2,8 @@ In the world of git, we're not scared about internal implementation details, and sometimes we like to dive in and tweak things by hand. Here's some documentation to that end. +[[!toc ]] + ## The .git/ directory ### `.git/annex/objects/aa/bb/*/*` @@ -364,3 +366,8 @@ of actual annexed files. These trees are recorded in history of the git-annex branch, but the head of the git-annex branch will never contain them. + +## Other internals documentation + +* [[git-remote-annex]] documents how git repositories are stored + on special remotes when using git with "annex::" urls. diff --git a/doc/internals/git-remote-annex.mdwn b/doc/internals/git-remote-annex.mdwn new file mode 100644 index 0000000000..8fff1eff4e --- /dev/null +++ b/doc/internals/git-remote-annex.mdwn @@ -0,0 +1,48 @@ +The [[git-remote-annex|/git-remote-annex]] command allows pushing a git +repository to a special remote, and later cloning from it. + +This adds two new key types to git-annex, GITMANIFEST and a GITBUNDLE. + +GITMANIFEST--$UUID is the manifest for a git repository stored in the +git-annex repository with that UUID. + +GITBUNDLE--$UUID-sha256 is a git bundle. + +# format of the manifest file + +An ordered list of bundle keys, one per line. + +Additionally, there may be bundle keys that are prefixed with "-". +These keys are not part of the current content of the git remote +and are in the process of being deleted. + +(Lines end with unix `"\n"`, not `"\r\n"`.) + +# exporttree=yes remotes + +In an exporttree=yes remote, the GITMANIFEST and GITBUNDLE objects are +stored in the remote, under the `.git/annex/objects/` path. + +# multiple GITMANIFEST files + +Usually there will only be one per special remote, but it's possible for +multiple special remotes to point to the same object storage, and if so +multiple GITMANIFEST objects can be stored. + +This is why the UUID of the special remote is included in the GITMANIFEST +key, and in the annex:: uri. + +# manually cloning from these files + +If you are unable to use git-annex and need to clone a git repository +stored in such a special remote, this procedure will work: + +* Find and download the GITMANIFEST +* Download each listed GITBUNDLE +* `git fetch` from each new bundle in order. + (Note that later bundles can update refs from the versions in previous + bundles.) + +When the special remote is encrypted, the GITMANIFEST and GITBUNDLE will +also be encrypted. To decrypt those manually, see this +[[fairly simple shell script using standard tools|tips/Decrypting_files_in_special_remotes_without_git-annex]]. diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn index 7399ba34a8..04f2feb9c6 100644 --- a/doc/special_remotes.mdwn +++ b/doc/special_remotes.mdwn @@ -5,8 +5,7 @@ directory. But, git-annex also extends git's concept of remotes, with these special types of remotes. These can be used by git-annex to store and retrieve -the content of files. They cannot be used by other git commands, and -the git history is not stored in them. +the content of files. * [[adb]] (for Android devices) * [[Amazon_Glacier|glacier]] @@ -94,15 +93,25 @@ To initialize a new special remote, use the special remote you want to use for details about configuration and examples of how to initremote it. -Once a special remote has been initialize, other clones of the repository can +Once a special remote has been initialized, other clones of the repository can also enable it, by using [[git-annex enableremote|git-annex-enableremote]] with the same name that was used to initialize it. (Run the command without any name to get a list of available special remotes.) Initializing or enabling a special remote adds it as a remote of your git -repository. You can't use git commands like `git pull` with the remote -(usually, there are exceptions like [[git-lfs]]), but you can use git-annex -commands. +repository. + +## Storing a git repository in a special remote + +Most special remotes do not include a clone of the git repository +by default, so you can't use commands like `git push` and `git pull` +with them. (There are some exceptions like [[git-lfs]].) + +But it is possible to store a git repository in many special remotes, +using the [[git-remote-annex]] command. This involves configuring +the remote with an "annex::" url. It's even possible to `git clone` +from a special remote using such an url. See the documentation of +[[git-remote-annex]] for details. ## Unused content on special remotes diff --git a/doc/todo/git-remote-annex.mdwn b/doc/todo/git-remote-annex.mdwn index 05ea923975..2d46d5701c 100644 --- a/doc/todo/git-remote-annex.mdwn +++ b/doc/todo/git-remote-annex.mdwn @@ -1,24 +1,70 @@ -git-remote-annex will be a program that allows push/pull of a git -repository to any git-annex special remote. +git-remote-annex will be a program that allows push/pull/clone of a git +repository to many types of git-annex special remote. This is a redesign and reimplementation of git-remote-datalad-annex. It will be a safer implementation, will support incremental pushes, and will be available to users who don't use datalad. - -Work is in the `git-remote-annex` branch, currently we have a design for -the core data files and operations. - - -Also, that branch has a proof of concept implementation in a shell script. -Though it doesn't yet use special remotes at all, it is able to do -incremental pushes to git bundles with a manifest. - -I still need to do some design work around using the git-annex branch to -detect concurrent push situations where changes to the manifest get lost, -and re-add those changes to it later. - -Also, it's not clear what will happen when two people make conflicting pushes -to a ref, the goal would be to replicate git push to a regular git remote, -but that may not be entirely possible. This will need to be investigated -further. --[[Joey]] + +--- + +This is implememented and working. Remaining todo list for it: + +* Test incremental push edge cases involving checkprereq. + +* Cloning from an annex:: url with importtree=yes doesn't work + (with or without exporttree=yes). This is because the ContentIdentifier + db is not populated. + +* Improve recovery from interrupted push by using outManifest to clean up + after it. (Requires populating outManifest.) + +* See XXX in uploadManifest about recovering from a situation + where the remote is left with a deleted manifest when a push + is interrupted part way through. This should be recoverable + by caching the manifest locally and re-uploading it when + the remote has no manifest or prompting the user to merge and re-push. + +* It would be nice if git-annex could generate an annex:: url + for a special remote and show it to the user, eg when + they have set the shorthand "annex::" url, so they know the full url. + `git-annex info $remote` could also display it. + Currently, the user has to remember how the special remote was + configured and replicate it all in the url. + + There are some difficulties to doing this, including that + RemoteConfig can have hidden fields that should be omitted. + +* initremote/enableremote could have an option that configures the url to a + special remote to a annex:: url. This would make it easier to use + git-remote-annex, since the user would not need to set up the url + themselves. (Also it would then avoid setting `skipFetchAll = true`) + +* datalad-annex supports cloning from the web special remote, + using an url that contains the result of pushing to eg, a directory + special remote. + `datalad-annex::https://example.com?type=web&url={noquery}` + Supporting something like this would be good. + +* Improve behavior in push races. A race can overwrite a change + to the MANIFEST and lose work that was pushed from the other repo. + From the user's perspective, that situation is the same as if one repo + pushed new work, then the other repo did a git push --force, overwriting + the first repo's push. In the first repo, another push will then fail as + a non fast-forward, and the user can recover as usual. This is probably + okish. + + But.. a MANIFEST overwrite will leave bundle files in the remote that + are not listed in the MANIFEST. It seems likely that git-annex could + detect that after the fact and clean it up. Eg, if it caches + the last MANIFEST it uploaded, next time it downloads the MANIFEST + it can check if there are bundle files in the old one that are not + in the new one. If so, it can drop those bundle files from the remote. + +* A push race can also appear to the user as if they pushed a ref, but then + it got deleted from the remote. This happens when two pushes are + pushing different ref names. This might be harder for the user to + notice; git fetch does not indicate that a remote ref got deleted. + They would have to use git fetch --prune to notice the deletion. + Once the user does notice, they can re-push their ref to recover. + Can this be improved? diff --git a/git-annex.cabal b/git-annex.cabal index 2098cd556d..5540828726 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -580,6 +580,7 @@ Executable git-annex Author Backend Backend.External + Backend.GitRemoteAnnex Backend.Hash Backend.URL Backend.Utilities @@ -606,6 +607,7 @@ Executable git-annex CmdLine.GitAnnexShell.Fields CmdLine.AnnexSetter CmdLine.Option + CmdLine.GitRemoteAnnex CmdLine.GitRemoteTorAnnex CmdLine.Seek CmdLine.Usage @@ -758,6 +760,7 @@ Executable git-annex Git.AutoCorrect Git.Branch Git.BuildVersion + Git.Bundle Git.CatFile Git.CheckAttr Git.CheckIgnore @@ -939,6 +942,7 @@ Executable git-annex Types.Export Types.FileMatcher Types.GitConfig + Types.GitRemoteAnnex Types.Group Types.Import Types.IndexFiles diff --git a/git-annex.hs b/git-annex.hs index 89a9350b40..88117b4508 100644 --- a/git-annex.hs +++ b/git-annex.hs @@ -1,6 +1,6 @@ {- git-annex main program dispatch - - - Copyright 2010-2016 Joey Hess + - Copyright 2010-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -13,6 +13,7 @@ import Network.Socket (withSocketsDo) import qualified CmdLine.GitAnnex import qualified CmdLine.GitAnnexShell +import qualified CmdLine.GitRemoteAnnex import qualified CmdLine.GitRemoteTorAnnex import qualified Test import qualified Benchmark @@ -35,6 +36,7 @@ main = sanitizeTopLevelExceptionMessages $ withSocketsDo $ do where run ps n = case takeFileName n of "git-annex-shell" -> CmdLine.GitAnnexShell.run ps + "git-remote-annex" -> CmdLine.GitRemoteAnnex.run ps "git-remote-tor-annex" -> CmdLine.GitRemoteTorAnnex.run ps _ -> CmdLine.GitAnnex.run Test.optParser Test.runner Benchmark.mkGenerator ps