rsync special remote

Fully tested and working, including resuming and encryption. (Though not
resuming when sending *with* encryption; gpg doesn't produce identical
output each time.)

Uses same layout as the directory special remote and the .git/annex/objects/
directory.
This commit is contained in:
Joey Hess 2011-04-27 20:06:07 -04:00
parent 4381ac062f
commit e68f128a9b
11 changed files with 265 additions and 19 deletions

View file

@ -12,6 +12,7 @@ module Content (
logStatusFor,
getViaTmp,
getViaTmpUnchecked,
withTmp,
checkDiskSpace,
preventWrite,
allowWrite,
@ -127,6 +128,17 @@ getViaTmpUnchecked key action = do
-- to resume its transfer
return False
{- Creates a temp file, runs an action on it, and cleans up the temp file. -}
withTmp :: Key -> (FilePath -> Annex a) -> Annex a
withTmp key action = do
g <- Annex.gitRepo
let tmp = gitAnnexTmpLocation g key
liftIO $ createDirectoryIfMissing True (parentDir tmp)
res <- action tmp
tmp_exists <- liftIO $ doesFileExist tmp
when tmp_exists $ liftIO $ removeFile tmp
return res
{- Checks that there is disk space available to store a given key,
- throwing an error if not. -}
checkDiskSpace :: Key -> Annex ()

View file

@ -48,6 +48,7 @@ import qualified Remote.Git
import qualified Remote.S3
import qualified Remote.Bup
import qualified Remote.Directory
import qualified Remote.Rsync
remoteTypes :: [RemoteType Annex]
remoteTypes =
@ -55,6 +56,7 @@ remoteTypes =
, Remote.S3.remote
, Remote.Bup.remote
, Remote.Directory.remote
, Remote.Rsync.remote
]
{- Builds a list of all available Remotes.

View file

@ -158,7 +158,7 @@ rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool)
rsynchelper r sending key file = do
showProgress -- make way for progress bar
p <- rsyncParams r sending key file
res <- liftIO $ boolSystem "rsync" p
res <- liftIO $ rsync p
if res
then return res
else do

193
Remote/Rsync.hs Normal file
View file

@ -0,0 +1,193 @@
{- A remote that is only accessible by rsync.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Remote.Rsync (remote) where
import qualified Data.ByteString.Lazy.Char8 as L
import Control.Exception.Extensible (IOException)
import qualified Data.Map as M
import Control.Monad.State (liftIO, when)
import System.FilePath
import System.Directory
import System.Posix.Files
import System.Posix.Process
import RemoteClass
import Types
import qualified GitRepo as Git
import qualified Annex
import UUID
import Locations
import Config
import Content
import Utility
import Remote.Special
import Remote.Encryptable
import Crypto
import Messages
import RsyncFile
type RsyncUrl = String
data RsyncOpts = RsyncOpts {
rsyncUrl :: RsyncUrl,
rsyncOptions :: [CommandParam]
}
remote :: RemoteType Annex
remote = RemoteType {
typename = "rsync",
enumerate = findSpecialRemotes "rsyncurl",
generate = gen,
setup = rsyncSetup
}
gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex)
gen r u c = do
url <- getConfig r "rsyncurl" (error "missing rsyncurl")
opts <- getConfig r "rsync-options" ""
let o = RsyncOpts url $ map Param $ words opts
cst <- remoteCost r expensiveRemoteCost
return $ encryptableRemote c
(storeEncrypted o)
(retrieveEncrypted o)
Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
storeKey = store o,
retrieveKeyFile = retrieve o,
removeKey = remove o,
hasKey = checkPresent r o,
hasKeyCheap = True,
config = Nothing
}
rsyncSetup :: UUID -> RemoteConfig -> Annex RemoteConfig
rsyncSetup u c = do
-- verify configuration is sane
let url = case M.lookup "rsyncurl" c of
Nothing -> error "Specify rsyncurl="
Just d -> d
c' <- encryptionSetup c
-- The rsyncurl is stored in git config, not only in this remote's
-- persistant state, so it can vary between hosts.
gitConfigSpecialRemote u c' "rsyncurl" url
return c'
rsyncKey :: RsyncOpts -> Key -> String
rsyncKey o k = rsyncUrl o </> hashDirMixed k </> f </> f
where
f = keyFile k
store :: RsyncOpts -> Key -> Annex Bool
store o k = do
g <- Annex.gitRepo
rsyncSend o k (gitAnnexLocation g k)
storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
g <- Annex.gitRepo
let f = gitAnnexLocation g k
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
rsyncSend o enck tmp
retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieve o k f = rsyncRemote o
-- use inplace when retrieving to support resuming
[ Param "--inplace"
, Param $ rsyncKey o k
, Param f
]
retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do
res <- retrieve o enck tmp
if res
then liftIO $ catchBool $ do
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
return True
else return res
remove :: RsyncOpts -> Key -> Annex Bool
remove o k = withRsyncScratchDir $ \tmp -> do
{- Send an empty directory to rysnc as the parent directory
- of the file to remove. -}
let dummy = tmp </> keyFile k
liftIO $ createDirectoryIfMissing True dummy
liftIO $ rsync $ rsyncOptions o ++
[ Params "--delete --recursive"
, partialParams
, Param $ addTrailingPathSeparator dummy
, Param $ parentDir $ rsyncKey o k
]
checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either IOException Bool)
checkPresent r o k = do
showNote ("checking " ++ Git.repoDescribe r ++ "...")
-- note: Does not currently differnetiate between rsync failing
-- to connect, and the file not being present.
res <- liftIO $ boolSystem "sh" [Param "-c", Param cmd]
return $ Right res
where
cmd = "rsync --quiet " ++ testfile ++ " 2>/dev/null"
testfile = shellEscape $ rsyncKey o k
{- Rsync params to enable resumes of sending files safely,
- ensure that files are only moved into place once complete
-}
partialParams :: CommandParam
partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial"
{- Runs an action in an empty scratch directory that can be used to build
- up trees for rsync. -}
withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool
withRsyncScratchDir a = do
g <- Annex.gitRepo
pid <- liftIO $ getProcessID
let tmp = gitAnnexTmpDir g </> "rsynctmp" </> show pid
nuke tmp
liftIO $ createDirectoryIfMissing True $ tmp
res <- a tmp
nuke tmp
return res
where
nuke d = liftIO $ do
e <- doesDirectoryExist d
when e $ liftIO $ removeDirectoryRecursive d
rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool
rsyncRemote o params = do
showProgress -- make way for progress bar
res <- liftIO $ rsync $ rsyncOptions o ++ defaultParams ++ params
if res
then return res
else do
showLongNote "rsync failed -- run git annex again to resume file transfer"
return res
where
defaultParams = [Params "--progress"]
{- To send a single key is slightly tricky; need to build up a temporary
directory structure to pass to rsync so it can create the hash
directories. -}
rsyncSend :: RsyncOpts -> Key -> FilePath -> Annex Bool
rsyncSend o k src = withRsyncScratchDir $ \tmp -> do
let dest = tmp </> hashDirMixed k </> f </> f
liftIO $ createDirectoryIfMissing True $ parentDir $ dest
liftIO $ createLink src dest
res <- rsyncRemote o
[ Param "--recursive"
, partialParams
-- tmp/ to send contents of tmp dir
, Param $ addTrailingPathSeparator tmp
, Param $ rsyncUrl o
]
return res
where
f = keyFile k

View file

@ -19,7 +19,6 @@ import Control.Monad (when)
import Control.Monad.State (liftIO)
import System.Environment
import System.Posix.Files
import System.Directory
import RemoteClass
import Types
@ -33,7 +32,7 @@ import Remote.Special
import Remote.Encryptable
import Crypto
import Key
import Utility
import Content
remote :: RemoteType Annex
remote = RemoteType {
@ -108,18 +107,15 @@ store r k = s3Action r False $ \(conn, bucket) -> do
s3Bool res
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
g <- Annex.gitRepo
let f = gitAnnexLocation g k
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) ->
-- To get file size of the encrypted content, have to use a temp file.
-- (An alternative would be chunking to to a constant size.)
let tmp = gitAnnexTmpLocation g enck
liftIO $ createDirectoryIfMissing True (parentDir tmp)
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
tmp_exists <- liftIO $ doesFileExist tmp
when tmp_exists $ liftIO $ removeFile tmp
s3Bool res
withTmp enck $ \tmp -> do
g <- Annex.gitRepo
let f = gitAnnexLocation g k
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
s3Bool res
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ())
storeHelper (conn, bucket) r k file = do

View file

@ -95,8 +95,9 @@ boolSystem command params = do
restoresignals oldint oldset
executeFile command True (toCommand params) Nothing
{- Escapes a filename to be safely able to be exposed to the shell. -}
shellEscape :: FilePath -> String
{- Escapes a filename or other parameter to be safely able to be exposed to
- the shell. -}
shellEscape :: String -> String
shellEscape f = "'" ++ escaped ++ "'"
where
-- replace ' with '"'"'

2
debian/changelog vendored
View file

@ -2,6 +2,8 @@ git-annex (0.20110426) UNRELEASED; urgency=low
* Switch back to haskell SHA library, so git-annex remains buildable on
Debian stable.
* Added rsync special remotes. This could be used, for example, to
store annexed content on rsync.net, encrypted naturally. Or anywhere else.
-- Joey Hess <joeyh@debian.org> Tue, 26 Apr 2011 11:23:54 -0400

View file

@ -1,5 +1,21 @@
i think it would be useful to have a fourth kind of [[special remote]]s that connects to a dumb storage using sftp or rsync. this can be emulated by using sshfs, but that means lots of round-trips through the system and is limited to platforms where sshfs is available.
i think it would be useful to have a fourth kind of [[special remote]]s
that connects to a dumb storage using sftp or rsync. this can be emulated
by using sshfs, but that means lots of round-trips through the system and
is limited to platforms where sshfs is available.
typical use cases are backups to storate shared between a group of people where each user only has limited access (sftp or rsync), when using [[bup]] is not an option.
typical use cases are backups to storate shared between a group of people
where each user only has limited access (sftp or rsync), when using [[bup]]
is not an option.
an alternative to implementing yet another special remote would be to have some kind of plugin system by which external programs can provide an interface to key-value stores (i'd implement the sftp backend myself, but haven't learned haskell yet).
an alternative to implementing yet another special remote would be to have
some kind of plugin system by which external programs can provide an
interface to key-value stores (i'd implement the sftp backend myself, but
haven't learned haskell yet).
> Ask and ye [[shall receive|special_remotes/rsync]].
>
> Sometimes I almost think that a generic configurable special remote that
> just uses configured shell commands would be useful.. But there's really
> no comparison with sitting down and writing code tuned to work with
> a given transport like rsync, when it comes to reliability and taking
> advantage of its abilities (like resuming). --[[Joey]]

View file

@ -9,6 +9,7 @@ They cannot be used by other git commands though.
* [[Amazon_S3]]
* [[bup]]
* [[directory]]
* [[rsync]]
## Unused content on special remotes

View file

@ -7,4 +7,4 @@ the drive's mountpoint as a directory remote.
Setup example:
# git annex initremote usbdrive directory=/media/usbdrive/ encryption=none
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none

View file

@ -0,0 +1,23 @@
This special remote type rsyncs file contents to somewhere else.
Setup example:
# git annex initremote myrsync type=rsync rsyncurl=rsync://rsync.example.com/myrsync encryption=joey@kitenet.net
## configuration
These parameters can be passed to `git annex initremote` to configure rsync:
* `encryption` - Required. Either "none" to disable encryption of content
stored in rsync,
or a value that can be looked up (using gpg -k) to find a gpg encryption
key that will be given access to the remote. Note that additional gpg
keys can be given access to a remote by rerunning initremote with
the new key id. See [[encryption]].
* `rsyncurl` - Required. This is the url or `hostname:/directory` to
pass to rsync to tell it where to store content.
The `annex-rsync-options` git configuration setting can be used to pass
parameters to rsync. Note that it is **not safe** to put "--delete"
in `annex-rsync-options` when using rsync special remotes.