abd417d4fe
Since bup split is not concurrency safe. Used a lock file so that 2 git-annex processes only run one bup split between them (per bup repo). (Concurrent writes from different git-annex repository clones to the same bup repo could still have concurrency problems.) Sponsored-by: Noam Kremen on Patreon
338 lines
10 KiB
Haskell
338 lines
10 KiB
Haskell
{- Using bup as a remote.
|
|
-
|
|
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE RankNTypes, OverloadedStrings #-}
|
|
|
|
module Remote.Bup (remote) where
|
|
|
|
import qualified Data.Map as M
|
|
import qualified Data.ByteString as S
|
|
import qualified Data.ByteString.Lazy as L
|
|
import qualified System.FilePath.ByteString as P
|
|
import Data.ByteString.Lazy.UTF8 (fromString)
|
|
import Control.Concurrent.Async
|
|
|
|
import Annex.Common
|
|
import qualified Annex
|
|
import Types.Remote
|
|
import Types.Creds
|
|
import Git.Types (ConfigValue(..), fromConfigKey)
|
|
import qualified Git
|
|
import qualified Git.Command
|
|
import qualified Git.Config
|
|
import qualified Git.Construct
|
|
import qualified Git.Ref
|
|
import Config
|
|
import Config.Cost
|
|
import qualified Remote.Helper.Ssh as Ssh
|
|
import Annex.SpecialRemote.Config
|
|
import Remote.Helper.Special
|
|
import Remote.Helper.ExportImport
|
|
import Utility.Hash
|
|
import Utility.UserInfo
|
|
import Annex.UUID
|
|
import Annex.Ssh
|
|
import Annex.LockFile
|
|
import Annex.Perms
|
|
import Utility.Metered
|
|
import Types.ProposedAccepted
|
|
|
|
type BupRepo = String
|
|
|
|
remote :: RemoteType
|
|
remote = specialRemoteType $ RemoteType
|
|
{ typename = "bup"
|
|
, enumerate = const (findSpecialRemotes "buprepo")
|
|
, generate = gen
|
|
, configParser = mkRemoteConfigParser
|
|
[ optionalStringParser buprepoField
|
|
(FieldDesc "(required) bup repository to use")
|
|
]
|
|
, setup = bupSetup
|
|
, exportSupported = exportUnsupported
|
|
, importSupported = importUnsupported
|
|
, thirdPartyPopulated = False
|
|
}
|
|
|
|
buprepoField :: RemoteConfigField
|
|
buprepoField = Accepted "buprepo"
|
|
|
|
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
|
|
gen r u rc gc rs = do
|
|
c <- parsedRemoteConfig remote rc
|
|
bupr <- liftIO $ bup2GitRemote buprepo
|
|
cst <- remoteCost gc $
|
|
if bupLocal buprepo
|
|
then nearlyCheapRemoteCost
|
|
else expensiveRemoteCost
|
|
(u', bupr') <- getBupUUID bupr u
|
|
|
|
let this = Remote
|
|
{ uuid = u'
|
|
, cost = cst
|
|
, name = Git.repoDescribe r
|
|
, storeKey = storeKeyDummy
|
|
, retrieveKeyFile = retrieveKeyFileDummy
|
|
, retrieveKeyFileCheap = Nothing
|
|
-- Bup uses git, which cryptographically verifies content
|
|
-- (with SHA1, but sufficiently for this).
|
|
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
|
, removeKey = removeKeyDummy
|
|
, lockContent = Nothing
|
|
, checkPresent = checkPresentDummy
|
|
, checkPresentCheap = bupLocal buprepo
|
|
, exportActions = exportUnsupported
|
|
, importActions = importUnsupported
|
|
, whereisKey = Nothing
|
|
, remoteFsck = Nothing
|
|
, repairRepo = Nothing
|
|
, config = c
|
|
, getRepo = return r
|
|
, gitconfig = gc
|
|
, localpath = if bupLocal buprepo && not (null buprepo)
|
|
then Just buprepo
|
|
else Nothing
|
|
, remotetype = remote
|
|
, availability = if bupLocal buprepo then LocallyAvailable else GloballyAvailable
|
|
, readonly = False
|
|
, appendonly = False
|
|
, untrustworthy = False
|
|
, mkUnavailable = return Nothing
|
|
, getInfo = return [("repo", buprepo)]
|
|
, claimUrl = Nothing
|
|
, checkUrl = Nothing
|
|
, remoteStateHandle = rs
|
|
}
|
|
let specialcfg = (specialRemoteCfg c)
|
|
-- chunking would not improve bup
|
|
{ chunkConfig = NoChunks
|
|
}
|
|
return $ Just $ specialRemote' specialcfg c
|
|
(store this buprepo)
|
|
(retrieve buprepo)
|
|
(remove buprepo)
|
|
(checkKey bupr')
|
|
this
|
|
where
|
|
buprepo = fromMaybe (giveup "missing buprepo") $ remoteAnnexBupRepo gc
|
|
|
|
bupSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
|
|
bupSetup _ mu _ c gc = do
|
|
u <- maybe (liftIO genUUID) return mu
|
|
|
|
-- verify configuration is sane
|
|
let buprepo = maybe (giveup "Specify buprepo=") fromProposedAccepted $
|
|
M.lookup buprepoField c
|
|
(c', _encsetup) <- encryptionSetup c gc
|
|
|
|
-- bup init will create the repository.
|
|
-- (If the repository already exists, bup init again appears safe.)
|
|
showAction "bup init"
|
|
unlessM (bup "init" buprepo []) $ giveup "bup init failed"
|
|
|
|
storeBupUUID u buprepo
|
|
|
|
-- The buprepo is stored in git config, as well as this repo's
|
|
-- persistant state, so it can vary between hosts.
|
|
gitConfigSpecialRemote u c' [("buprepo", buprepo)]
|
|
|
|
return (c', u)
|
|
|
|
bupParams :: String -> BupRepo -> [CommandParam] -> [CommandParam]
|
|
bupParams command buprepo params =
|
|
Param command : [Param "-r", Param buprepo] ++ params
|
|
|
|
bup :: String -> BupRepo -> [CommandParam] -> Annex Bool
|
|
bup command buprepo params = do
|
|
showOutput -- make way for bup output
|
|
liftIO $ boolSystem "bup" $ bupParams command buprepo params
|
|
|
|
bupSplitParams :: Remote -> BupRepo -> Key -> [CommandParam] -> [CommandParam]
|
|
bupSplitParams r buprepo k src =
|
|
let os = map Param $ remoteAnnexBupSplitOptions $ gitconfig r
|
|
in bupParams "split" buprepo
|
|
(os ++ [Param "-q", Param "-n", Param (bupRef k)] ++ src)
|
|
|
|
store :: Remote -> BupRepo -> Storer
|
|
store r buprepo = byteStorer $ \k b p -> lockBup r $ do
|
|
liftIO $ withNullHandle $ \nullh ->
|
|
let params = bupSplitParams r buprepo k []
|
|
cmd = (proc "bup" (toCommand params))
|
|
{ std_in = CreatePipe
|
|
, std_out = UseHandle nullh
|
|
, std_err = CreatePipe
|
|
}
|
|
feeder = \h -> do
|
|
meteredWrite p (S.hPut h) b
|
|
hClose h
|
|
in withCreateProcess cmd (go feeder cmd)
|
|
where
|
|
go feeder p (Just inh) _ (Just errh) pid = do
|
|
-- bup split is noisy to stderr even with the -q
|
|
-- option. But when bup fails, the stderr needs
|
|
-- to be displayed.
|
|
(feedresult, erroutput) <- tryNonAsync (feeder inh)
|
|
`concurrently` hGetContentsStrict errh
|
|
waitForProcess pid >>= \case
|
|
ExitSuccess -> case feedresult of
|
|
Right () -> return ()
|
|
Left e -> throwM e
|
|
ExitFailure n -> giveup $
|
|
showCmd p ++ " exited " ++ show n ++
|
|
" (stderr output: " ++ erroutput ++ ")"
|
|
go _ _ _ _ _ _ = error "internal"
|
|
|
|
{- Bup is not concurrency safe, so use a lock file to prevent more than
|
|
- one process from running. -}
|
|
lockBup :: Remote -> Annex a -> Annex a
|
|
lockBup r a = do
|
|
dir <- fromRepo gitAnnexRemotesDir
|
|
unlessM (liftIO $ doesDirectoryExist (fromRawFilePath dir)) $
|
|
createAnnexDirectory dir
|
|
let remoteid = fromUUID (uuid r)
|
|
let lck = dir P.</> remoteid <> ".lck"
|
|
withExclusiveLock (const lck) a
|
|
|
|
retrieve :: BupRepo -> Retriever
|
|
retrieve buprepo = byteRetriever $ \k sink -> do
|
|
let params = bupParams "join" buprepo [Param $ bupRef k]
|
|
let p = (proc "bup" (toCommand params))
|
|
{ std_out = CreatePipe }
|
|
bracketIO (createProcess p) cleanupProcess (go sink p)
|
|
where
|
|
go sink p (_, Just h, _, pid) = do
|
|
r <- sink =<< liftIO (L.hGetContents h)
|
|
liftIO $ do
|
|
hClose h
|
|
forceSuccessProcess p pid
|
|
return r
|
|
go _ _ _ = error "internal"
|
|
|
|
{- Cannot revert having stored a key in bup, but at least the data for the
|
|
- key will be used for deltaing data of other keys stored later.
|
|
-
|
|
- We can, however, remove the git branch that bup created for the key.
|
|
-}
|
|
remove :: BupRepo -> Remover
|
|
remove buprepo k = do
|
|
go =<< liftIO (bup2GitRemote buprepo)
|
|
warning "content cannot be completely removed from bup remote"
|
|
where
|
|
go r
|
|
| Git.repoIsUrl r = void $ onBupRemote r boolSystem "git" params
|
|
| otherwise = void $ liftIO $ catchMaybeIO $ do
|
|
r' <- Git.Config.read r
|
|
boolSystem "git" $ Git.Command.gitCommandLine params r'
|
|
params = [ Param "branch", Param "-q", Param "-D", Param (bupRef k) ]
|
|
|
|
{- Bup does not provide a way to tell if a given dataset is present
|
|
- in a bup repository. One way it to check if the git repository has
|
|
- a branch matching the name (as created by bup split -n).
|
|
-}
|
|
checkKey :: Git.Repo -> CheckPresent
|
|
checkKey bupr k
|
|
| Git.repoIsUrl bupr = onBupRemote bupr boolSystem "git" params
|
|
| otherwise = liftIO $ boolSystem "git" $
|
|
Git.Command.gitCommandLine params bupr
|
|
where
|
|
params =
|
|
[ Param "show-ref"
|
|
, Param "--quiet"
|
|
, Param "--verify"
|
|
, Param $ "refs/heads/" ++ bupRef k
|
|
]
|
|
|
|
{- Store UUID in the annex.uuid setting of the bup repository. -}
|
|
storeBupUUID :: UUID -> BupRepo -> Annex ()
|
|
storeBupUUID u buprepo = do
|
|
r <- liftIO $ bup2GitRemote buprepo
|
|
if Git.repoIsUrl r
|
|
then do
|
|
showAction "storing uuid"
|
|
unlessM (onBupRemote r boolSystem "git"
|
|
[Param "config", Param (fromConfigKey configkeyUUID), Param v]) $
|
|
giveup "ssh failed"
|
|
else liftIO $ do
|
|
r' <- Git.Config.read r
|
|
let noolduuid = case Git.Config.get configkeyUUID mempty r' of
|
|
ConfigValue olduuid -> S.null olduuid
|
|
NoConfigValue -> True
|
|
when noolduuid $
|
|
Git.Command.run
|
|
[ Param "config"
|
|
, Param "annex.uuid"
|
|
, Param v
|
|
] r'
|
|
where
|
|
v = fromUUID u
|
|
|
|
onBupRemote :: Git.Repo -> (FilePath -> [CommandParam] -> IO a) -> FilePath -> [CommandParam] -> Annex a
|
|
onBupRemote r runner command params = do
|
|
c <- Annex.getRemoteGitConfig r
|
|
let remotecmd = "cd " ++ dir ++ " && " ++ unwords (command : toCommand params)
|
|
(sshcmd, sshparams) <- Ssh.toRepo NoConsumeStdin r c remotecmd
|
|
liftIO $ runner sshcmd sshparams
|
|
where
|
|
path = fromRawFilePath $ Git.repoPath r
|
|
base = fromMaybe path (stripPrefix "/~/" path)
|
|
dir = shellEscape base
|
|
|
|
{- Allow for bup repositories on removable media by checking
|
|
- local bup repositories to see if they are available, and getting their
|
|
- uuid (which may be different from the stored uuid for the bup remote).
|
|
-
|
|
- If a bup repository is not available, returns NoUUID.
|
|
- This will cause checkPresent to indicate nothing from the bup remote
|
|
- is known to be present.
|
|
-
|
|
- Also, returns a version of the repo with config read, if it is local.
|
|
-}
|
|
getBupUUID :: Git.Repo -> UUID -> Annex (UUID, Git.Repo)
|
|
getBupUUID r u
|
|
| Git.repoIsUrl r = return (u, r)
|
|
| otherwise = liftIO $ do
|
|
ret <- tryIO $ Git.Config.read r
|
|
case ret of
|
|
Right r' -> return (toUUID $ Git.Config.get configkeyUUID mempty r', r')
|
|
Left _ -> return (NoUUID, r)
|
|
|
|
{- Converts a bup remote path spec into a Git.Repo. There are some
|
|
- differences in path representation between git and bup. -}
|
|
bup2GitRemote :: BupRepo -> IO Git.Repo
|
|
bup2GitRemote "" = do
|
|
-- bup -r "" operates on ~/.bup
|
|
h <- myHomeDir
|
|
Git.Construct.fromPath $ toRawFilePath $ h </> ".bup"
|
|
bup2GitRemote r
|
|
| bupLocal r =
|
|
if "/" `isPrefixOf` r
|
|
then Git.Construct.fromPath (toRawFilePath r)
|
|
else giveup "please specify an absolute path"
|
|
| otherwise = Git.Construct.fromUrl $ "ssh://" ++ host ++ slash dir
|
|
where
|
|
bits = splitc ':' r
|
|
host = Prelude.head bits
|
|
dir = intercalate ":" $ drop 1 bits
|
|
-- "host:~user/dir" is not supported specially by bup;
|
|
-- "host:dir" is relative to the home directory;
|
|
-- "host:" goes in ~/.bup
|
|
slash d
|
|
| null d = "/~/.bup"
|
|
| "/" `isPrefixOf` d = d
|
|
| otherwise = "/~/" ++ d
|
|
|
|
{- Converts a key into a git ref name, which bup-split -n will use to point
|
|
- to it. -}
|
|
bupRef :: Key -> String
|
|
bupRef k
|
|
| Git.Ref.legal True shown = shown
|
|
| otherwise = "git-annex-" ++ show (sha2_256 (fromString shown))
|
|
where
|
|
shown = serializeKey k
|
|
|
|
bupLocal :: BupRepo -> Bool
|
|
bupLocal = notElem ':'
|