git-annex/Remote/Bup.hs
Joey Hess 1243af4a18
toward SafeDropProof expiry checking
Added Maybe POSIXTime to SafeDropProof, which gets set when the proof is
based on a LockedCopy. If there are several LockedCopies, it uses the
closest expiry time. That is not optimal, it may be that the proof
expires based on one LockedCopy but another one has not expired. But
that seems unlikely to really happen, and anyway the user can just
re-run a drop if it fails due to expiry.

Pass the SafeDropProof to removeKey, which is responsible for checking
it for expiry in situations where that could be a problem. Which really
only means in Remote.Git.

Made Remote.Git check expiry when dropping from a local remote.

Checking expiry when dropping from a P2P remote is not yet implemented.
P2P.Protocol.remove has SafeDropProof plumbed through to it for that
purpose.

Fixing the remaining 2 build warnings should complete this work.

Note that the use of a POSIXTime here means that if the clock gets set
forward while git-annex is in the middle of a drop, it may say that
dropping took too long. That seems ok. Less ok is that if the clock gets
turned back a sufficient amount (eg 5 minutes), proof expiry won't be
noticed. It might be better to use the Monotonic clock, but that doesn't
advance when a laptop is suspended, and while there is the linux
Boottime clock, that is not available on other systems. Perhaps a
combination of POSIXTime and the Monotonic clock could detect laptop
suspension and also detect clock being turned back?

There is a potential future flag day where
p2pDefaultLockContentRetentionDuration is not assumed, but is probed
using the P2P protocol, and peers that don't support it can no longer
produce a LockedCopy. Until that happens, when git-annex is
communicating with older peers there is a risk of data loss when
a ssh connection closes during LOCKCONTENT.
2024-07-04 12:39:06 -04:00

343 lines
11 KiB
Haskell

{- Using bup as a remote.
-
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE RankNTypes, OverloadedStrings #-}
module Remote.Bup (remote) where
import qualified Data.Map as M
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
import Data.ByteString.Lazy.UTF8 (fromString)
import Control.Concurrent.Async
import Annex.Common
import qualified Annex
import Types.Remote
import Types.Creds
import Git.Types (ConfigValue(..), fromConfigKey)
import qualified Git
import qualified Git.Command
import qualified Git.Config
import qualified Git.Construct
import qualified Git.Ref
import Config
import Config.Cost
import qualified Remote.Helper.Ssh as Ssh
import Annex.SpecialRemote.Config
import Remote.Helper.Special
import Remote.Helper.ExportImport
import Remote.Helper.Path
import Utility.Hash
import Utility.UserInfo
import Annex.UUID
import Annex.Ssh
import Annex.LockFile
import Annex.Perms
import Utility.Metered
import Types.ProposedAccepted
type BupRepo = String
remote :: RemoteType
remote = specialRemoteType $ RemoteType
{ typename = "bup"
, enumerate = const (findSpecialRemotes "buprepo")
, generate = gen
, configParser = mkRemoteConfigParser
[ optionalStringParser buprepoField
(FieldDesc "(required) bup repository to use")
]
, setup = bupSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
buprepoField :: RemoteConfigField
buprepoField = Accepted "buprepo"
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs = do
c <- parsedRemoteConfig remote rc
bupr <- liftIO $ bup2GitRemote buprepo
cst <- remoteCost gc c $
if bupLocal buprepo
then nearlyCheapRemoteCost
else expensiveRemoteCost
(u', bupr') <- getBupUUID bupr u
let this = Remote
{ uuid = u'
, cost = cst
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileCheap = Nothing
-- Bup uses git, which cryptographically verifies content
-- (with SHA1, but sufficiently for this).
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = removeKeyDummy
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = bupLocal buprepo
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
, config = c
, getRepo = return r
, gitconfig = gc
, localpath = if bupLocal buprepo && not (null buprepo)
then Just buprepo
else Nothing
, remotetype = remote
, availability = if null buprepo
then pure LocallyAvailable
else checkPathAvailability (bupLocal buprepo) buprepo
, readonly = False
, appendonly = False
, untrustworthy = False
, mkUnavailable = return Nothing
, getInfo = return [("repo", buprepo)]
, claimUrl = Nothing
, checkUrl = Nothing
, remoteStateHandle = rs
}
let specialcfg = (specialRemoteCfg c)
-- chunking would not improve bup
{ chunkConfig = NoChunks
}
return $ Just $ specialRemote' specialcfg c
(store this buprepo)
(retrieve this buprepo)
(remove buprepo)
(checkKey bupr')
this
where
buprepo = fromMaybe (giveup "missing buprepo") $ remoteAnnexBupRepo gc
bupSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
bupSetup _ mu _ c gc = do
u <- maybe (liftIO genUUID) return mu
-- verify configuration is sane
let buprepo = maybe (giveup "Specify buprepo=") fromProposedAccepted $
M.lookup buprepoField c
(c', _encsetup) <- encryptionSetup c gc
-- bup init will create the repository.
-- (If the repository already exists, bup init again appears safe.)
showAction "bup init"
unlessM (bup "init" buprepo []) $ giveup "bup init failed"
storeBupUUID u buprepo
-- The buprepo is stored in git config, as well as this repo's
-- persistent state, so it can vary between hosts.
gitConfigSpecialRemote u c' [("buprepo", buprepo)]
return (c', u)
bupParams :: String -> BupRepo -> [CommandParam] -> [CommandParam]
bupParams command buprepo params =
Param command : [Param "-r", Param buprepo] ++ params
bup :: String -> BupRepo -> [CommandParam] -> Annex Bool
bup command buprepo params = do
showOutput -- make way for bup output
liftIO $ boolSystem "bup" $ bupParams command buprepo params
bupSplitParams :: Remote -> BupRepo -> Key -> [CommandParam] -> [CommandParam]
bupSplitParams r buprepo k src =
let os = map Param $ remoteAnnexBupSplitOptions $ gitconfig r
in bupParams "split" buprepo
(os ++ [Param "-q", Param "-n", Param (bupRef k)] ++ src)
store :: Remote -> BupRepo -> Storer
store r buprepo = byteStorer $ \k b p -> lockBup True r $ do
liftIO $ withNullHandle $ \nullh ->
let params = bupSplitParams r buprepo k []
cmd = (proc "bup" (toCommand params))
{ std_in = CreatePipe
, std_out = UseHandle nullh
, std_err = CreatePipe
}
feeder = \h -> do
meteredWrite p (S.hPut h) b
hClose h
in withCreateProcess cmd (go feeder cmd)
where
go feeder p (Just inh) _ (Just errh) pid = do
-- bup split is noisy to stderr even with the -q
-- option. But when bup fails, the stderr needs
-- to be displayed.
(feedresult, erroutput) <- tryNonAsync (feeder inh)
`concurrently` hGetContentsStrict errh
waitForProcess pid >>= \case
ExitSuccess -> case feedresult of
Right () -> return ()
Left e -> throwM e
ExitFailure n -> giveup $
showCmd p ++ " exited " ++ show n ++
" (stderr output: " ++ erroutput ++ ")"
go _ _ _ _ _ _ = error "internal"
retrieve :: Remote -> BupRepo -> Retriever
retrieve r buprepo = byteRetriever $ \k sink -> lockBup True r $ do
let params = bupParams "join" buprepo [Param $ bupRef k]
let p = (proc "bup" (toCommand params))
{ std_out = CreatePipe }
bracketIO (createProcess p) cleanupProcess (go sink p)
where
go sink p (_, Just h, _, pid) = do
v <- sink =<< liftIO (L.hGetContents h)
liftIO $ do
hClose h
forceSuccessProcess p pid
return v
go _ _ _ = error "internal"
{- Cannot revert having stored a key in bup, but at least the data for the
- key will be used for deltaing data of other keys stored later.
-
- We can, however, remove the git branch that bup created for the key.
-}
remove :: BupRepo -> Remover
remove buprepo _proof k = do
go =<< liftIO (bup2GitRemote buprepo)
warning "content cannot be completely removed from bup remote"
where
go r
| Git.repoIsUrl r = void $ onBupRemote r boolSystem "git" params
| otherwise = void $ liftIO $ catchMaybeIO $ do
r' <- Git.Config.read r
boolSystem "git" $ Git.Command.gitCommandLine params r'
params = [ Param "branch", Param "-q", Param "-D", Param (bupRef k) ]
{- Bup does not provide a way to tell if a given dataset is present
- in a bup repository. One way it to check if the git repository has
- a branch matching the name (as created by bup split -n).
-}
checkKey :: Git.Repo -> CheckPresent
checkKey bupr k
| Git.repoIsUrl bupr = onBupRemote bupr boolSystem "git" params
| otherwise = liftIO $ boolSystem "git" $
Git.Command.gitCommandLine params bupr
where
params =
[ Param "show-ref"
, Param "--quiet"
, Param "--verify"
, Param $ "refs/heads/" ++ bupRef k
]
{- Store UUID in the annex.uuid setting of the bup repository. -}
storeBupUUID :: UUID -> BupRepo -> Annex ()
storeBupUUID u buprepo = do
r <- liftIO $ bup2GitRemote buprepo
if Git.repoIsUrl r
then do
showAction "storing uuid"
unlessM (onBupRemote r boolSystem "git"
[Param "config", Param (fromConfigKey configkeyUUID), Param v]) $
giveup "ssh failed"
else liftIO $ do
r' <- Git.Config.read r
let noolduuid = case Git.Config.get configkeyUUID mempty r' of
ConfigValue olduuid -> S.null olduuid
NoConfigValue -> True
when noolduuid $
Git.Command.run
[ Param "config"
, Param "annex.uuid"
, Param v
] r'
where
v = fromUUID u
onBupRemote :: Git.Repo -> (FilePath -> [CommandParam] -> IO a) -> FilePath -> [CommandParam] -> Annex a
onBupRemote r runner command params = do
c <- Annex.getRemoteGitConfig r
let remotecmd = "cd " ++ dir ++ " && " ++ unwords (command : toCommand params)
(sshcmd, sshparams) <- Ssh.toRepo NoConsumeStdin r c remotecmd
liftIO $ runner sshcmd sshparams
where
path = fromRawFilePath $ Git.repoPath r
base = fromMaybe path (stripPrefix "/~/" path)
dir = shellEscape base
{- Allow for bup repositories on removable media by checking
- local bup repositories to see if they are available, and getting their
- uuid (which may be different from the stored uuid for the bup remote).
-
- If a bup repository is not available, returns NoUUID.
- This will cause checkPresent to indicate nothing from the bup remote
- is known to be present.
-
- Also, returns a version of the repo with config read, if it is local.
-}
getBupUUID :: Git.Repo -> UUID -> Annex (UUID, Git.Repo)
getBupUUID r u
| Git.repoIsUrl r = return (u, r)
| otherwise = liftIO $ do
ret <- tryIO $ Git.Config.read r
case ret of
Right r' -> return (toUUID $ Git.Config.get configkeyUUID mempty r', r')
Left _ -> return (NoUUID, r)
{- Converts a bup remote path spec into a Git.Repo. There are some
- differences in path representation between git and bup. -}
bup2GitRemote :: BupRepo -> IO Git.Repo
bup2GitRemote "" = do
-- bup -r "" operates on ~/.bup
h <- myHomeDir
Git.Construct.fromPath $ toRawFilePath $ h </> ".bup"
bup2GitRemote r
| bupLocal r =
if "/" `isPrefixOf` r
then Git.Construct.fromPath (toRawFilePath r)
else giveup "please specify an absolute path"
| otherwise = Git.Construct.fromUrl $ "ssh://" ++ host ++ slash dir
where
bits = splitc ':' r
host = Prelude.head bits
dir = intercalate ":" $ drop 1 bits
-- "host:~user/dir" is not supported specially by bup;
-- "host:dir" is relative to the home directory;
-- "host:" goes in ~/.bup
slash d
| null d = "/~/.bup"
| "/" `isPrefixOf` d = d
| otherwise = "/~/" ++ d
{- Converts a key into a git ref name, which bup-split -n will use to point
- to it. -}
bupRef :: Key -> String
bupRef k
| Git.Ref.legal True shown = shown
| otherwise = "git-annex-" ++ show (sha2_256 (fromString shown))
where
shown = serializeKey k
bupLocal :: BupRepo -> Bool
bupLocal = notElem ':'
{- Bup is not concurrency safe, so use a lock file. Only one writer process
- should run at a time; multiple readers may run if no writer is running. -}
lockBup :: Bool -> Remote -> Annex a -> Annex a
lockBup writer r a = do
dir <- fromRepo gitAnnexRemotesDir
unlessM (liftIO $ doesDirectoryExist (fromRawFilePath dir)) $
createAnnexDirectory dir
let remoteid = fromUUID (uuid r)
let lck = dir P.</> remoteid <> ".lck"
if writer
then withExclusiveLock lck a
else withSharedLock lck a