git-annex/Remote/Ddar.hs

240 lines
7.6 KiB
Haskell
Raw Normal View History

2014-05-15 18:44:00 +00:00
{- Using ddar as a remote. Based on bup and rsync remotes.
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
2014-05-15 18:44:00 +00:00
- Copyright 2014 Robie Basak <robie@justgohome.co.uk>
-
- Licensed under the GNU GPL version 3 or higher.
2014-05-15 18:44:00 +00:00
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-}
2014-05-15 18:44:00 +00:00
module Remote.Ddar (remote) where
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
2014-05-15 18:44:00 +00:00
import System.IO.Error
import System.PosixCompat.Files (isDirectory)
2014-05-15 18:44:00 +00:00
import Annex.Common
2014-05-15 18:44:00 +00:00
import Types.Remote
import Types.Creds
import qualified Git
import Config
import Config.Cost
import Annex.SpecialRemote.Config
2014-05-15 18:44:00 +00:00
import Remote.Helper.Special
2019-02-20 19:55:01 +00:00
import Remote.Helper.ExportImport
import Remote.Helper.Path
2014-05-15 18:44:00 +00:00
import Annex.Ssh
import Annex.UUID
import Utility.SshHost
import Types.ProposedAccepted
import qualified Utility.RawFilePath as R
2014-05-15 18:44:00 +00:00
data DdarRepo = DdarRepo
{ ddarRepoConfig :: RemoteGitConfig
, ddarRepoLocation :: String
}
2014-05-15 18:44:00 +00:00
remote :: RemoteType
remote = specialRemoteType $ RemoteType
{ typename = "ddar"
, enumerate = const (findSpecialRemotes "ddarrepo")
, generate = gen
, configParser = mkRemoteConfigParser
[ optionalStringParser ddarrepoField
(FieldDesc "(required) location of ddar archive to use")
]
, setup = ddarSetup
, exportSupported = exportUnsupported
2019-02-20 19:55:01 +00:00
, importSupported = importUnsupported
add thirdPartyPopulated interface This is to support, eg a borg repo as a special remote, which is populated not by running git-annex commands, but by using borg. Then git-annex sync lists the content of the remote, learns which files are annex objects, and treats those as present in the remote. So, most of the import machinery is reused, to a new purpose. While normally importtree maintains a remote tracking branch, this does not, because the files stored in the remote are annex object files, not user-visible filenames. But, internally, a git tree is still generated, of the files on the remote that are annex objects. This tree is used by retrieveExportWithContentIdentifier, etc. As with other import/export remotes, that the tree is recorded in the export log, and gets grafted into the git-annex branch. importKey changed to be able to return Nothing, to indicate when an ImportLocation is not an annex object and so should be skipped from being included in the tree. It did not seem to make sense to have git-annex import do this, since from the user's perspective, it's not like other imports. So only git-annex sync does it. Note that, git-annex sync does not yet download objects from such remotes that are preferred content. importKeys is run with content downloading disabled, to avoid getting the content of all objects. Perhaps what's needed is for seekSyncContent to be run with these remotes, but I don't know if it will just work (in particular, it needs to avoid trying to transfer objects to them), so I skipped that for now. (Untested and unused as of yet.) This commit was sponsored by Jochen Bartl on Patreon.
2020-12-18 18:52:57 +00:00
, thirdPartyPopulated = False
}
2014-05-15 18:44:00 +00:00
ddarrepoField :: RemoteConfigField
ddarrepoField = Accepted "ddarrepo"
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs = do
c <- parsedRemoteConfig remote rc
cst <- remoteCost gc c $
2014-05-15 18:44:00 +00:00
if ddarLocal ddarrepo
then nearlyCheapRemoteCost
else expensiveRemoteCost
let specialcfg = (specialRemoteCfg c)
-- chunking would not improve ddar
{ chunkConfig = NoChunks
}
return $ Just $ specialRemote' specialcfg c
(store ddarrepo)
(retrieve ddarrepo)
(remove ddarrepo)
(checkKey ddarrepo)
(this c cst)
where
this c cst = Remote
2014-05-15 18:44:00 +00:00
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileCheap = Nothing
-- ddar communicates over ssh, not subject to http redirect
-- type attacks
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = removeKeyDummy
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = ddarLocal ddarrepo
, exportActions = exportUnsupported
2019-02-20 19:55:01 +00:00
, importActions = importUnsupported
2014-05-15 18:44:00 +00:00
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
, config = c
, getRepo = return r
2014-05-15 18:44:00 +00:00
, gitconfig = gc
, localpath = if ddarLocal ddarrepo && not (null $ ddarRepoLocation ddarrepo)
then Just $ ddarRepoLocation ddarrepo
2014-05-15 18:44:00 +00:00
else Nothing
, remotetype = remote
, availability = checkPathAvailability
(ddarLocal ddarrepo && not (null $ ddarRepoLocation ddarrepo))
(ddarRepoLocation ddarrepo)
2014-05-15 18:44:00 +00:00
, readonly = False
, appendonly = False
, untrustworthy = False
, mkUnavailable = return Nothing
, getInfo = return [("repo", ddarRepoLocation ddarrepo)]
2014-12-08 17:40:15 +00:00
, claimUrl = Nothing
, checkUrl = Nothing
, remoteStateHandle = rs
2014-05-15 18:44:00 +00:00
}
ddarrepo = maybe (giveup "missing ddarrepo") (DdarRepo gc) (remoteAnnexDdarRepo gc)
2014-05-15 18:44:00 +00:00
ddarSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
ddarSetup _ mu _ c gc = do
2014-05-15 18:44:00 +00:00
u <- maybe (liftIO genUUID) return mu
-- verify configuration is sane
let ddarrepo = maybe (giveup "Specify ddarrepo=") fromProposedAccepted $
M.lookup ddarrepoField c
(c', _encsetup) <- encryptionSetup c gc
2014-05-15 18:44:00 +00:00
-- The ddarrepo is stored in git config, as well as this repo's
2023-03-14 02:39:16 +00:00
-- persistent state, so it can vary between hosts.
gitConfigSpecialRemote u c' [("ddarrepo", ddarrepo)]
2014-05-15 18:44:00 +00:00
return (c', u)
store :: DdarRepo -> Storer
store ddarrepo = fileStorer $ \k src _p -> do
2014-05-15 18:44:00 +00:00
let params =
[ Param "c"
, Param "-N"
, Param $ serializeKey k
, Param $ ddarRepoLocation ddarrepo
2014-05-15 18:44:00 +00:00
, File src
]
unlessM (liftIO $ boolSystem "ddar" params) $
giveup "ddar failed"
2014-05-15 18:44:00 +00:00
{- Convert remote DdarRepo to host and path on remote end -}
splitRemoteDdarRepo :: DdarRepo -> (SshHost, String)
splitRemoteDdarRepo ddarrepo = (either giveup id $ mkSshHost host, ddarrepo')
2014-05-15 18:44:00 +00:00
where
(host, remainder) = span (/= ':') (ddarRepoLocation ddarrepo)
2014-05-15 18:44:00 +00:00
ddarrepo' = drop 1 remainder
{- Return the command and parameters to use for a ddar call that may need to be
- made on a remote repository. This will call ssh if needed. -}
ddarRemoteCall :: ConsumeStdin -> DdarRepo -> Char -> [CommandParam] -> Annex (String, [CommandParam])
ddarRemoteCall cs ddarrepo cmd params
2014-05-15 18:44:00 +00:00
| ddarLocal ddarrepo = return ("ddar", localParams)
| otherwise = sshCommand cs (host, Nothing) (ddarRepoConfig ddarrepo) remoteCommand
2014-05-15 18:44:00 +00:00
where
(host, ddarrepo') = splitRemoteDdarRepo ddarrepo
localParams = Param [cmd] : Param (ddarRepoLocation ddarrepo) : params
remoteCommand = unwords $ map shellEscape $ toCommand $
[Param "ddar", Param [cmd], Param ddarrepo'] ++ params
2014-05-15 18:44:00 +00:00
{- Specialized ddarRemoteCall that includes extraction command and flags -}
ddarExtractRemoteCall :: ConsumeStdin -> DdarRepo -> Key -> Annex (String, [CommandParam])
ddarExtractRemoteCall cs ddarrepo k =
ddarRemoteCall cs ddarrepo 'x' [Param "--force-stdout", Param $ serializeKey k]
2014-05-15 18:44:00 +00:00
retrieve :: DdarRepo -> Retriever
retrieve ddarrepo = byteRetriever $ \k sink -> do
(cmd, params) <- ddarExtractRemoteCall NoConsumeStdin ddarrepo k
let p = (proc cmd $ toCommand params)
{ std_out = CreatePipe }
bracketIO (createProcess p) cleanupProcess (go sink p)
where
go sink p (_, Just h, _, pid) = do
r <- sink =<< liftIO (L.hGetContents h)
liftIO $ do
hClose h
forceSuccessProcess p pid
return r
go _ _ _ = error "internal"
2014-05-15 18:44:00 +00:00
remove :: DdarRepo -> Remover
toward SafeDropProof expiry checking Added Maybe POSIXTime to SafeDropProof, which gets set when the proof is based on a LockedCopy. If there are several LockedCopies, it uses the closest expiry time. That is not optimal, it may be that the proof expires based on one LockedCopy but another one has not expired. But that seems unlikely to really happen, and anyway the user can just re-run a drop if it fails due to expiry. Pass the SafeDropProof to removeKey, which is responsible for checking it for expiry in situations where that could be a problem. Which really only means in Remote.Git. Made Remote.Git check expiry when dropping from a local remote. Checking expiry when dropping from a P2P remote is not yet implemented. P2P.Protocol.remove has SafeDropProof plumbed through to it for that purpose. Fixing the remaining 2 build warnings should complete this work. Note that the use of a POSIXTime here means that if the clock gets set forward while git-annex is in the middle of a drop, it may say that dropping took too long. That seems ok. Less ok is that if the clock gets turned back a sufficient amount (eg 5 minutes), proof expiry won't be noticed. It might be better to use the Monotonic clock, but that doesn't advance when a laptop is suspended, and while there is the linux Boottime clock, that is not available on other systems. Perhaps a combination of POSIXTime and the Monotonic clock could detect laptop suspension and also detect clock being turned back? There is a potential future flag day where p2pDefaultLockContentRetentionDuration is not assumed, but is probed using the P2P protocol, and peers that don't support it can no longer produce a LockedCopy. Until that happens, when git-annex is communicating with older peers there is a risk of data loss when a ssh connection closes during LOCKCONTENT.
2024-07-04 16:23:46 +00:00
remove ddarrepo _proof key = do
(cmd, params) <- ddarRemoteCall NoConsumeStdin ddarrepo 'd'
[Param $ serializeKey key]
2020-05-14 18:08:09 +00:00
unlessM (liftIO $ boolSystem cmd params) $
giveup "ddar failed to remove"
2014-05-15 18:44:00 +00:00
ddarDirectoryExists :: DdarRepo -> Annex (Either String Bool)
ddarDirectoryExists ddarrepo
| ddarLocal ddarrepo = do
maybeStatus <- liftIO $ tryJust (guard . isDoesNotExistError) $
R.getSymbolicLinkStatus $ toRawFilePath $ ddarRepoLocation ddarrepo
2014-05-15 18:44:00 +00:00
return $ case maybeStatus of
Left _ -> Right False
Right status -> Right $ isDirectory status
| otherwise = do
let remotecmd = unwords $ map shellEscape
[ "test", "-d", ddarrepo' ]
(sshcmd, sshps) <- sshCommand NoConsumeStdin (host, Nothing)
(ddarRepoConfig ddarrepo) remotecmd
exitCode <- liftIO $ safeSystem sshcmd sshps
2014-05-15 18:44:00 +00:00
case exitCode of
ExitSuccess -> return $ Right True
ExitFailure 1 -> return $ Right False
ExitFailure code -> return $ Left $ "ssh " ++
show (unwords $ toCommand sshps) ++
2014-05-15 18:44:00 +00:00
" failed with status " ++ show code
where
(host, ddarrepo') = splitRemoteDdarRepo ddarrepo
{- Use "ddar t" to determine if a given key is present in a ddar archive -}
inDdarManifest :: DdarRepo -> Key -> Annex (Either String Bool)
inDdarManifest ddarrepo k = do
(cmd, params) <- ddarRemoteCall NoConsumeStdin ddarrepo 't' []
let p = (proc cmd $ toCommand params)
{ std_out = CreatePipe }
liftIO $ catchMsgIO $ withCreateProcess p (go p)
2014-05-15 18:44:00 +00:00
where
k' = serializeKey k
go p _ (Just hout) _ pid = do
contents <- hGetContents hout
let !r = elem k' (lines contents)
forceSuccessProcess p pid
return r
go _ _ _ _ _ = error "internal"
2014-05-15 18:44:00 +00:00
checkKey :: DdarRepo -> CheckPresent
checkKey ddarrepo key = do
2014-05-15 18:44:00 +00:00
directoryExists <- ddarDirectoryExists ddarrepo
case directoryExists of
Left e -> error e
Right True -> either giveup return
=<< inDdarManifest ddarrepo key
Right False -> return False
2014-05-15 18:44:00 +00:00
ddarLocal :: DdarRepo -> Bool
ddarLocal = notElem ':' . ddarRepoLocation