git-annex/Remote/Rsync.hs

307 lines
9.3 KiB
Haskell
Raw Normal View History

{- A remote that is only accessible by rsync.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
2013-05-10 21:29:59 +00:00
{-# LANGUAGE CPP #-}
module Remote.Rsync (
remote,
storeEncrypted,
retrieveEncrypted,
remove,
checkPresent,
withRsyncScratchDir,
genRsyncOpts,
RsyncOpts
) where
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
#ifndef mingw32_HOST_OS
import System.Posix.Process (getProcessID)
#else
import System.Random (getStdRandom, random)
2013-05-10 21:29:59 +00:00
#endif
2011-10-05 20:02:51 +00:00
import Common.Annex
import Types.Remote
import qualified Git
import Config
import Config.Cost
2011-10-04 04:40:47 +00:00
import Annex.Content
import Annex.UUID
import Annex.Ssh
2011-08-17 00:49:54 +00:00
import Remote.Helper.Special
import Remote.Helper.Encryptable
import Crypto
2012-09-19 18:28:32 +00:00
import Utility.Rsync
import Utility.CopyFile
import Utility.Metered
import Annex.Perms
type RsyncUrl = String
data RsyncOpts = RsyncOpts
{ rsyncUrl :: RsyncUrl
, rsyncOptions :: [CommandParam]
, rsyncShellEscape :: Bool
}
2011-12-31 08:11:39 +00:00
remote :: RemoteType
remote = RemoteType {
typename = "rsync",
enumerate = findSpecialRemotes "rsyncurl",
generate = gen,
setup = rsyncSetup
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex Remote
gen r u c gc = do
cst <- remoteCost gc expensiveRemoteCost
(transport, url) <- rsyncTransport gc $
fromMaybe (error "missing rsyncurl") $ remoteAnnexRsyncUrl gc
let o = genRsyncOpts c gc transport url
let islocal = rsyncUrlIsPath $ rsyncUrl o
return $ encryptableRemote c
(storeEncrypted o $ getGpgEncParams (c,gc))
(retrieveEncrypted o)
Remote
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
2012-12-13 04:45:27 +00:00
, storeKey = store o
, retrieveKeyFile = retrieve o
, retrieveKeyFileCheap = retrieveCheap o
, removeKey = remove o
, hasKey = checkPresent r o
, hasKeyCheap = False
, whereisKey = Nothing
2012-11-30 04:55:59 +00:00
, config = M.empty
, repo = r
, gitconfig = gc
2013-03-15 23:16:13 +00:00
, localpath = if islocal
then Just $ rsyncUrl o
else Nothing
, readonly = False
2013-03-15 23:16:13 +00:00
, globallyAvailable = not $ islocal
, remotetype = remote
}
genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> [CommandParam] -> RsyncUrl -> RsyncOpts
genRsyncOpts c gc transport url = RsyncOpts url (transport ++ opts) escape
2012-11-11 04:51:07 +00:00
where
opts = map Param $ filter safe $ remoteAnnexRsyncOptions gc
escape = M.lookup "shellescape" c /= Just "no"
safe opt
2012-11-11 04:51:07 +00:00
-- Don't allow user to pass --delete to rsync;
-- that could cause it to delete other keys
-- in the same hash bucket as a key it sends.
| opt == "--delete" = False
| opt == "--delete-excluded" = False
2012-11-11 04:51:07 +00:00
| otherwise = True
rsyncTransport :: RemoteGitConfig -> RsyncUrl -> Annex ([CommandParam], RsyncUrl)
rsyncTransport gc rawurl
| rsyncUrlIsShell rawurl =
(\rsh -> return (rsyncShell rsh, resturl)) =<<
case fromNull ["ssh"] (remoteAnnexRsyncTransport gc) of
"ssh":sshopts -> do
let (port, sshopts') = sshReadPort sshopts
host = takeWhile (/=':') resturl
-- Connection caching
(Param "ssh":) <$> sshCachingOptions
(host, port)
(map Param $ loginopt ++ sshopts')
"rsh":rshopts -> return $ map Param $ "rsh" :
loginopt ++ rshopts
rsh -> error $ "Unknown Rsync transport: "
++ unwords rsh
| otherwise = return ([], rawurl)
where
(login,resturl) = case separate (=='@') rawurl of
(h, "") -> (Nothing, h)
(l, h) -> (Just l, h)
loginopt = maybe [] (\l -> ["-l",l]) login
fromNull as xs = if null xs then as else xs
rsyncSetup :: Maybe UUID -> RemoteConfig -> Annex (RemoteConfig, UUID)
rsyncSetup mu c = do
u <- maybe (liftIO genUUID) return mu
-- verify configuration is sane
2011-07-15 16:47:14 +00:00
let url = fromMaybe (error "Specify rsyncurl=") $
2011-05-15 06:49:43 +00:00
M.lookup "rsyncurl" c
c' <- encryptionSetup c
-- The rsyncurl is stored in git config, not only in this remote's
-- persistant state, so it can vary between hosts.
gitConfigSpecialRemote u c' "rsyncurl" url
return (c', u)
rsyncEscape :: RsyncOpts -> String -> String
rsyncEscape o s
| rsyncShellEscape o && rsyncUrlIsShell (rsyncUrl o) = shellEscape s
| otherwise = s
rsyncUrls :: RsyncOpts -> Key -> [String]
rsyncUrls o k = map use annexHashes
2012-11-11 04:51:07 +00:00
where
use h = rsyncUrl o </> h k </> rsyncEscape o (f </> f)
f = keyFile k
2012-09-21 18:50:14 +00:00
store :: RsyncOpts -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
store o k _f p = sendAnnex k (void $ remove o k) $ rsyncSend o p k False
storeEncrypted :: RsyncOpts -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted o gpgOpts (cipher, enck) k p = withTmp enck $ \tmp ->
sendAnnex k (void $ remove o enck) $ \src -> do
liftIO $ encrypt gpgOpts cipher (feedFile src) $
readBytes $ L.writeFile tmp
rsyncSend o p enck True tmp
retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
retrieve o k _ f p = rsyncRetrieve o k f (Just p)
retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieveCheap o k f = ifM (preseedTmp k f) ( rsyncRetrieve o k f Nothing , return False )
retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
retrieveEncrypted o (cipher, enck) _ f p = withTmp enck $ \tmp ->
ifM (rsyncRetrieve o enck tmp (Just p))
2012-03-16 00:39:25 +00:00
( liftIO $ catchBoolIO $ do
decrypt cipher (feedFile tmp) $
readBytes $ L.writeFile f
return True
2012-03-16 00:39:25 +00:00
, return False
)
remove :: RsyncOpts -> Key -> Annex Bool
remove o k = do
ps <- sendParams
withRsyncScratchDir $ \tmp -> liftIO $ do
{- Send an empty directory to rysnc to make it delete. -}
let dummy = tmp </> keyFile k
createDirectoryIfMissing True dummy
rsync $ rsyncOptions o ++ ps ++
map (\s -> Param $ "--include=" ++ s) includes ++
[ Param "--exclude=*" -- exclude everything else
, Params "--quiet --delete --recursive"
, partialParams
, Param $ addTrailingPathSeparator dummy
, Param $ rsyncUrl o
]
2012-11-11 04:51:07 +00:00
where
{- Specify include rules to match the directories where the
- content could be. Note that the parent directories have
- to also be explicitly included, due to how rsync
- traverses directories. -}
includes = concatMap use annexHashes
use h = let dir = h k in
[ parentDir dir
, dir
-- match content directory and anything in it
, dir </> keyFile k </> "***"
]
checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either String Bool)
checkPresent r o k = do
showAction $ "checking " ++ Git.repoDescribe r
-- note: Does not currently differentiate between rsync failing
-- to connect, and the file not being present.
Right <$> check
2012-11-11 04:51:07 +00:00
where
2012-12-13 04:45:27 +00:00
check = untilTrue (rsyncUrls o k) $ \u ->
2012-11-11 04:51:07 +00:00
liftIO $ catchBoolIO $ do
withQuietOutput createProcessSuccess $
proc "rsync" $ toCommand $
rsyncOptions o ++ [Param u]
return True
{- Rsync params to enable resumes of sending files safely,
- ensure that files are only moved into place once complete
-}
partialParams :: CommandParam
partialParams = Params "--partial --partial-dir=.rsync-partial"
{- When sending files from crippled filesystems, the permissions can be all
- messed up, and it's better to use the default permissions on the
- destination. -}
sendParams :: Annex [CommandParam]
sendParams = ifM crippledFileSystem
( return [rsyncUseDestinationPermissions]
, return []
)
{- Runs an action in an empty scratch directory that can be used to build
- up trees for rsync. -}
withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool
withRsyncScratchDir a = do
#ifndef mingw32_HOST_OS
v <- liftIO getProcessID
#else
v <- liftIO (getStdRandom random :: IO Int)
#endif
t <- fromRepo gitAnnexTmpDir
createAnnexDirectory t
let tmp = t </> "rsynctmp" </> show v
nuke tmp
2011-07-15 16:47:14 +00:00
liftIO $ createDirectoryIfMissing True tmp
2012-01-03 04:29:27 +00:00
nuke tmp `after` a tmp
2012-11-11 04:51:07 +00:00
where
nuke d = liftIO $ whenM (doesDirectoryExist d) $
removeDirectoryRecursive d
rsyncRetrieve :: RsyncOpts -> Key -> FilePath -> Maybe MeterUpdate -> Annex Bool
rsyncRetrieve o k dest callback =
untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o callback
-- use inplace when retrieving to support resuming
[ Param "--inplace"
, Param u
2013-05-14 17:24:15 +00:00
, File dest
]
2012-09-21 18:50:14 +00:00
rsyncRemote :: RsyncOpts -> (Maybe MeterUpdate) -> [CommandParam] -> Annex Bool
rsyncRemote o callback params = do
showOutput -- make way for progress bar
ifM (liftIO $ (maybe rsync rsyncProgress callback) ps)
2012-03-16 00:39:25 +00:00
( return True
, do
showLongNote "rsync failed -- run git annex again to resume file transfer"
2012-03-16 00:39:25 +00:00
return False
)
2012-11-11 04:51:07 +00:00
where
defaultParams = [Params "--progress"]
ps = rsyncOptions o ++ defaultParams ++ params
{- To send a single key is slightly tricky; need to build up a temporary
2012-12-13 04:45:27 +00:00
- directory structure to pass to rsync so it can create the hash
- directories.
-
- This would not be necessary if the hash directory structure used locally
- was always the same as that used on the rsync remote. So if that's ever
- unified, this gets nicer.
- (When we have the right hash directory structure, we can just
- pass --include=X --include=X/Y --include=X/Y/file --exclude=*)
-}
rsyncSend :: RsyncOpts -> MeterUpdate -> Key -> Bool -> FilePath -> Annex Bool
rsyncSend o callback k canrename src = withRsyncScratchDir $ \tmp -> do
let dest = tmp </> Prelude.head (keyPaths k)
2011-07-15 16:47:14 +00:00
liftIO $ createDirectoryIfMissing True $ parentDir dest
ok <- liftIO $ if canrename
2013-02-15 17:48:25 +00:00
then do
renameFile src dest
2013-02-15 17:48:25 +00:00
return True
else createLinkOrCopy src dest
ps <- sendParams
if ok
then rsyncRemote o (Just callback) $ ps ++
[ Param "--recursive"
, partialParams
-- tmp/ to send contents of tmp dir
2013-05-14 17:24:15 +00:00
, File $ addTrailingPathSeparator tmp
, Param $ rsyncUrl o
]
else return False