rsync special remote
Fully tested and working, including resuming and encryption. (Though not resuming when sending *with* encryption; gpg doesn't produce identical output each time.) Uses same layout as the directory special remote and the .git/annex/objects/ directory.
This commit is contained in:
parent
4381ac062f
commit
e68f128a9b
11 changed files with 265 additions and 19 deletions
12
Content.hs
12
Content.hs
|
@ -12,6 +12,7 @@ module Content (
|
||||||
logStatusFor,
|
logStatusFor,
|
||||||
getViaTmp,
|
getViaTmp,
|
||||||
getViaTmpUnchecked,
|
getViaTmpUnchecked,
|
||||||
|
withTmp,
|
||||||
checkDiskSpace,
|
checkDiskSpace,
|
||||||
preventWrite,
|
preventWrite,
|
||||||
allowWrite,
|
allowWrite,
|
||||||
|
@ -127,6 +128,17 @@ getViaTmpUnchecked key action = do
|
||||||
-- to resume its transfer
|
-- to resume its transfer
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
{- Creates a temp file, runs an action on it, and cleans up the temp file. -}
|
||||||
|
withTmp :: Key -> (FilePath -> Annex a) -> Annex a
|
||||||
|
withTmp key action = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
|
let tmp = gitAnnexTmpLocation g key
|
||||||
|
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
||||||
|
res <- action tmp
|
||||||
|
tmp_exists <- liftIO $ doesFileExist tmp
|
||||||
|
when tmp_exists $ liftIO $ removeFile tmp
|
||||||
|
return res
|
||||||
|
|
||||||
{- Checks that there is disk space available to store a given key,
|
{- Checks that there is disk space available to store a given key,
|
||||||
- throwing an error if not. -}
|
- throwing an error if not. -}
|
||||||
checkDiskSpace :: Key -> Annex ()
|
checkDiskSpace :: Key -> Annex ()
|
||||||
|
|
|
@ -48,6 +48,7 @@ import qualified Remote.Git
|
||||||
import qualified Remote.S3
|
import qualified Remote.S3
|
||||||
import qualified Remote.Bup
|
import qualified Remote.Bup
|
||||||
import qualified Remote.Directory
|
import qualified Remote.Directory
|
||||||
|
import qualified Remote.Rsync
|
||||||
|
|
||||||
remoteTypes :: [RemoteType Annex]
|
remoteTypes :: [RemoteType Annex]
|
||||||
remoteTypes =
|
remoteTypes =
|
||||||
|
@ -55,6 +56,7 @@ remoteTypes =
|
||||||
, Remote.S3.remote
|
, Remote.S3.remote
|
||||||
, Remote.Bup.remote
|
, Remote.Bup.remote
|
||||||
, Remote.Directory.remote
|
, Remote.Directory.remote
|
||||||
|
, Remote.Rsync.remote
|
||||||
]
|
]
|
||||||
|
|
||||||
{- Builds a list of all available Remotes.
|
{- Builds a list of all available Remotes.
|
||||||
|
|
|
@ -158,7 +158,7 @@ rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool)
|
||||||
rsynchelper r sending key file = do
|
rsynchelper r sending key file = do
|
||||||
showProgress -- make way for progress bar
|
showProgress -- make way for progress bar
|
||||||
p <- rsyncParams r sending key file
|
p <- rsyncParams r sending key file
|
||||||
res <- liftIO $ boolSystem "rsync" p
|
res <- liftIO $ rsync p
|
||||||
if res
|
if res
|
||||||
then return res
|
then return res
|
||||||
else do
|
else do
|
||||||
|
|
193
Remote/Rsync.hs
Normal file
193
Remote/Rsync.hs
Normal file
|
@ -0,0 +1,193 @@
|
||||||
|
{- A remote that is only accessible by rsync.
|
||||||
|
-
|
||||||
|
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Remote.Rsync (remote) where
|
||||||
|
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as L
|
||||||
|
import Control.Exception.Extensible (IOException)
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import Control.Monad.State (liftIO, when)
|
||||||
|
import System.FilePath
|
||||||
|
import System.Directory
|
||||||
|
import System.Posix.Files
|
||||||
|
import System.Posix.Process
|
||||||
|
|
||||||
|
import RemoteClass
|
||||||
|
import Types
|
||||||
|
import qualified GitRepo as Git
|
||||||
|
import qualified Annex
|
||||||
|
import UUID
|
||||||
|
import Locations
|
||||||
|
import Config
|
||||||
|
import Content
|
||||||
|
import Utility
|
||||||
|
import Remote.Special
|
||||||
|
import Remote.Encryptable
|
||||||
|
import Crypto
|
||||||
|
import Messages
|
||||||
|
import RsyncFile
|
||||||
|
|
||||||
|
type RsyncUrl = String
|
||||||
|
|
||||||
|
data RsyncOpts = RsyncOpts {
|
||||||
|
rsyncUrl :: RsyncUrl,
|
||||||
|
rsyncOptions :: [CommandParam]
|
||||||
|
}
|
||||||
|
|
||||||
|
remote :: RemoteType Annex
|
||||||
|
remote = RemoteType {
|
||||||
|
typename = "rsync",
|
||||||
|
enumerate = findSpecialRemotes "rsyncurl",
|
||||||
|
generate = gen,
|
||||||
|
setup = rsyncSetup
|
||||||
|
}
|
||||||
|
|
||||||
|
gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex)
|
||||||
|
gen r u c = do
|
||||||
|
url <- getConfig r "rsyncurl" (error "missing rsyncurl")
|
||||||
|
opts <- getConfig r "rsync-options" ""
|
||||||
|
let o = RsyncOpts url $ map Param $ words opts
|
||||||
|
cst <- remoteCost r expensiveRemoteCost
|
||||||
|
return $ encryptableRemote c
|
||||||
|
(storeEncrypted o)
|
||||||
|
(retrieveEncrypted o)
|
||||||
|
Remote {
|
||||||
|
uuid = u,
|
||||||
|
cost = cst,
|
||||||
|
name = Git.repoDescribe r,
|
||||||
|
storeKey = store o,
|
||||||
|
retrieveKeyFile = retrieve o,
|
||||||
|
removeKey = remove o,
|
||||||
|
hasKey = checkPresent r o,
|
||||||
|
hasKeyCheap = True,
|
||||||
|
config = Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
rsyncSetup :: UUID -> RemoteConfig -> Annex RemoteConfig
|
||||||
|
rsyncSetup u c = do
|
||||||
|
-- verify configuration is sane
|
||||||
|
let url = case M.lookup "rsyncurl" c of
|
||||||
|
Nothing -> error "Specify rsyncurl="
|
||||||
|
Just d -> d
|
||||||
|
c' <- encryptionSetup c
|
||||||
|
|
||||||
|
-- The rsyncurl is stored in git config, not only in this remote's
|
||||||
|
-- persistant state, so it can vary between hosts.
|
||||||
|
gitConfigSpecialRemote u c' "rsyncurl" url
|
||||||
|
return c'
|
||||||
|
|
||||||
|
rsyncKey :: RsyncOpts -> Key -> String
|
||||||
|
rsyncKey o k = rsyncUrl o </> hashDirMixed k </> f </> f
|
||||||
|
where
|
||||||
|
f = keyFile k
|
||||||
|
|
||||||
|
store :: RsyncOpts -> Key -> Annex Bool
|
||||||
|
store o k = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
|
rsyncSend o k (gitAnnexLocation g k)
|
||||||
|
|
||||||
|
storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool
|
||||||
|
storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
|
||||||
|
g <- Annex.gitRepo
|
||||||
|
let f = gitAnnexLocation g k
|
||||||
|
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
|
||||||
|
rsyncSend o enck tmp
|
||||||
|
|
||||||
|
retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool
|
||||||
|
retrieve o k f = rsyncRemote o
|
||||||
|
-- use inplace when retrieving to support resuming
|
||||||
|
[ Param "--inplace"
|
||||||
|
, Param $ rsyncKey o k
|
||||||
|
, Param f
|
||||||
|
]
|
||||||
|
|
||||||
|
retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool
|
||||||
|
retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do
|
||||||
|
res <- retrieve o enck tmp
|
||||||
|
if res
|
||||||
|
then liftIO $ catchBool $ do
|
||||||
|
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
|
||||||
|
return True
|
||||||
|
else return res
|
||||||
|
|
||||||
|
remove :: RsyncOpts -> Key -> Annex Bool
|
||||||
|
remove o k = withRsyncScratchDir $ \tmp -> do
|
||||||
|
{- Send an empty directory to rysnc as the parent directory
|
||||||
|
- of the file to remove. -}
|
||||||
|
let dummy = tmp </> keyFile k
|
||||||
|
liftIO $ createDirectoryIfMissing True dummy
|
||||||
|
liftIO $ rsync $ rsyncOptions o ++
|
||||||
|
[ Params "--delete --recursive"
|
||||||
|
, partialParams
|
||||||
|
, Param $ addTrailingPathSeparator dummy
|
||||||
|
, Param $ parentDir $ rsyncKey o k
|
||||||
|
]
|
||||||
|
|
||||||
|
checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either IOException Bool)
|
||||||
|
checkPresent r o k = do
|
||||||
|
showNote ("checking " ++ Git.repoDescribe r ++ "...")
|
||||||
|
-- note: Does not currently differnetiate between rsync failing
|
||||||
|
-- to connect, and the file not being present.
|
||||||
|
res <- liftIO $ boolSystem "sh" [Param "-c", Param cmd]
|
||||||
|
return $ Right res
|
||||||
|
where
|
||||||
|
cmd = "rsync --quiet " ++ testfile ++ " 2>/dev/null"
|
||||||
|
testfile = shellEscape $ rsyncKey o k
|
||||||
|
|
||||||
|
{- Rsync params to enable resumes of sending files safely,
|
||||||
|
- ensure that files are only moved into place once complete
|
||||||
|
-}
|
||||||
|
partialParams :: CommandParam
|
||||||
|
partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial"
|
||||||
|
|
||||||
|
{- Runs an action in an empty scratch directory that can be used to build
|
||||||
|
- up trees for rsync. -}
|
||||||
|
withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool
|
||||||
|
withRsyncScratchDir a = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
|
pid <- liftIO $ getProcessID
|
||||||
|
let tmp = gitAnnexTmpDir g </> "rsynctmp" </> show pid
|
||||||
|
nuke tmp
|
||||||
|
liftIO $ createDirectoryIfMissing True $ tmp
|
||||||
|
res <- a tmp
|
||||||
|
nuke tmp
|
||||||
|
return res
|
||||||
|
where
|
||||||
|
nuke d = liftIO $ do
|
||||||
|
e <- doesDirectoryExist d
|
||||||
|
when e $ liftIO $ removeDirectoryRecursive d
|
||||||
|
|
||||||
|
rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool
|
||||||
|
rsyncRemote o params = do
|
||||||
|
showProgress -- make way for progress bar
|
||||||
|
res <- liftIO $ rsync $ rsyncOptions o ++ defaultParams ++ params
|
||||||
|
if res
|
||||||
|
then return res
|
||||||
|
else do
|
||||||
|
showLongNote "rsync failed -- run git annex again to resume file transfer"
|
||||||
|
return res
|
||||||
|
where
|
||||||
|
defaultParams = [Params "--progress"]
|
||||||
|
|
||||||
|
{- To send a single key is slightly tricky; need to build up a temporary
|
||||||
|
directory structure to pass to rsync so it can create the hash
|
||||||
|
directories. -}
|
||||||
|
rsyncSend :: RsyncOpts -> Key -> FilePath -> Annex Bool
|
||||||
|
rsyncSend o k src = withRsyncScratchDir $ \tmp -> do
|
||||||
|
let dest = tmp </> hashDirMixed k </> f </> f
|
||||||
|
liftIO $ createDirectoryIfMissing True $ parentDir $ dest
|
||||||
|
liftIO $ createLink src dest
|
||||||
|
res <- rsyncRemote o
|
||||||
|
[ Param "--recursive"
|
||||||
|
, partialParams
|
||||||
|
-- tmp/ to send contents of tmp dir
|
||||||
|
, Param $ addTrailingPathSeparator tmp
|
||||||
|
, Param $ rsyncUrl o
|
||||||
|
]
|
||||||
|
return res
|
||||||
|
where
|
||||||
|
f = keyFile k
|
|
@ -19,7 +19,6 @@ import Control.Monad (when)
|
||||||
import Control.Monad.State (liftIO)
|
import Control.Monad.State (liftIO)
|
||||||
import System.Environment
|
import System.Environment
|
||||||
import System.Posix.Files
|
import System.Posix.Files
|
||||||
import System.Directory
|
|
||||||
|
|
||||||
import RemoteClass
|
import RemoteClass
|
||||||
import Types
|
import Types
|
||||||
|
@ -33,7 +32,7 @@ import Remote.Special
|
||||||
import Remote.Encryptable
|
import Remote.Encryptable
|
||||||
import Crypto
|
import Crypto
|
||||||
import Key
|
import Key
|
||||||
import Utility
|
import Content
|
||||||
|
|
||||||
remote :: RemoteType Annex
|
remote :: RemoteType Annex
|
||||||
remote = RemoteType {
|
remote = RemoteType {
|
||||||
|
@ -108,18 +107,15 @@ store r k = s3Action r False $ \(conn, bucket) -> do
|
||||||
s3Bool res
|
s3Bool res
|
||||||
|
|
||||||
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
|
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
|
||||||
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
|
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) ->
|
||||||
g <- Annex.gitRepo
|
|
||||||
let f = gitAnnexLocation g k
|
|
||||||
-- To get file size of the encrypted content, have to use a temp file.
|
-- To get file size of the encrypted content, have to use a temp file.
|
||||||
-- (An alternative would be chunking to to a constant size.)
|
-- (An alternative would be chunking to to a constant size.)
|
||||||
let tmp = gitAnnexTmpLocation g enck
|
withTmp enck $ \tmp -> do
|
||||||
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
g <- Annex.gitRepo
|
||||||
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
|
let f = gitAnnexLocation g k
|
||||||
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
|
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
|
||||||
tmp_exists <- liftIO $ doesFileExist tmp
|
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
|
||||||
when tmp_exists $ liftIO $ removeFile tmp
|
s3Bool res
|
||||||
s3Bool res
|
|
||||||
|
|
||||||
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ())
|
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ())
|
||||||
storeHelper (conn, bucket) r k file = do
|
storeHelper (conn, bucket) r k file = do
|
||||||
|
|
|
@ -95,8 +95,9 @@ boolSystem command params = do
|
||||||
restoresignals oldint oldset
|
restoresignals oldint oldset
|
||||||
executeFile command True (toCommand params) Nothing
|
executeFile command True (toCommand params) Nothing
|
||||||
|
|
||||||
{- Escapes a filename to be safely able to be exposed to the shell. -}
|
{- Escapes a filename or other parameter to be safely able to be exposed to
|
||||||
shellEscape :: FilePath -> String
|
- the shell. -}
|
||||||
|
shellEscape :: String -> String
|
||||||
shellEscape f = "'" ++ escaped ++ "'"
|
shellEscape f = "'" ++ escaped ++ "'"
|
||||||
where
|
where
|
||||||
-- replace ' with '"'"'
|
-- replace ' with '"'"'
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -2,6 +2,8 @@ git-annex (0.20110426) UNRELEASED; urgency=low
|
||||||
|
|
||||||
* Switch back to haskell SHA library, so git-annex remains buildable on
|
* Switch back to haskell SHA library, so git-annex remains buildable on
|
||||||
Debian stable.
|
Debian stable.
|
||||||
|
* Added rsync special remotes. This could be used, for example, to
|
||||||
|
store annexed content on rsync.net, encrypted naturally. Or anywhere else.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Tue, 26 Apr 2011 11:23:54 -0400
|
-- Joey Hess <joeyh@debian.org> Tue, 26 Apr 2011 11:23:54 -0400
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,21 @@
|
||||||
i think it would be useful to have a fourth kind of [[special remote]]s that connects to a dumb storage using sftp or rsync. this can be emulated by using sshfs, but that means lots of round-trips through the system and is limited to platforms where sshfs is available.
|
i think it would be useful to have a fourth kind of [[special remote]]s
|
||||||
|
that connects to a dumb storage using sftp or rsync. this can be emulated
|
||||||
|
by using sshfs, but that means lots of round-trips through the system and
|
||||||
|
is limited to platforms where sshfs is available.
|
||||||
|
|
||||||
typical use cases are backups to storate shared between a group of people where each user only has limited access (sftp or rsync), when using [[bup]] is not an option.
|
typical use cases are backups to storate shared between a group of people
|
||||||
|
where each user only has limited access (sftp or rsync), when using [[bup]]
|
||||||
|
is not an option.
|
||||||
|
|
||||||
an alternative to implementing yet another special remote would be to have some kind of plugin system by which external programs can provide an interface to key-value stores (i'd implement the sftp backend myself, but haven't learned haskell yet).
|
an alternative to implementing yet another special remote would be to have
|
||||||
|
some kind of plugin system by which external programs can provide an
|
||||||
|
interface to key-value stores (i'd implement the sftp backend myself, but
|
||||||
|
haven't learned haskell yet).
|
||||||
|
|
||||||
|
> Ask and ye [[shall receive|special_remotes/rsync]].
|
||||||
|
>
|
||||||
|
> Sometimes I almost think that a generic configurable special remote that
|
||||||
|
> just uses configured shell commands would be useful.. But there's really
|
||||||
|
> no comparison with sitting down and writing code tuned to work with
|
||||||
|
> a given transport like rsync, when it comes to reliability and taking
|
||||||
|
> advantage of its abilities (like resuming). --[[Joey]]
|
||||||
|
|
|
@ -9,6 +9,7 @@ They cannot be used by other git commands though.
|
||||||
* [[Amazon_S3]]
|
* [[Amazon_S3]]
|
||||||
* [[bup]]
|
* [[bup]]
|
||||||
* [[directory]]
|
* [[directory]]
|
||||||
|
* [[rsync]]
|
||||||
|
|
||||||
## Unused content on special remotes
|
## Unused content on special remotes
|
||||||
|
|
||||||
|
|
|
@ -7,4 +7,4 @@ the drive's mountpoint as a directory remote.
|
||||||
|
|
||||||
Setup example:
|
Setup example:
|
||||||
|
|
||||||
# git annex initremote usbdrive directory=/media/usbdrive/ encryption=none
|
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none
|
||||||
|
|
23
doc/special_remotes/rsync.mdwn
Normal file
23
doc/special_remotes/rsync.mdwn
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
This special remote type rsyncs file contents to somewhere else.
|
||||||
|
|
||||||
|
Setup example:
|
||||||
|
|
||||||
|
# git annex initremote myrsync type=rsync rsyncurl=rsync://rsync.example.com/myrsync encryption=joey@kitenet.net
|
||||||
|
|
||||||
|
## configuration
|
||||||
|
|
||||||
|
These parameters can be passed to `git annex initremote` to configure rsync:
|
||||||
|
|
||||||
|
* `encryption` - Required. Either "none" to disable encryption of content
|
||||||
|
stored in rsync,
|
||||||
|
or a value that can be looked up (using gpg -k) to find a gpg encryption
|
||||||
|
key that will be given access to the remote. Note that additional gpg
|
||||||
|
keys can be given access to a remote by rerunning initremote with
|
||||||
|
the new key id. See [[encryption]].
|
||||||
|
|
||||||
|
* `rsyncurl` - Required. This is the url or `hostname:/directory` to
|
||||||
|
pass to rsync to tell it where to store content.
|
||||||
|
|
||||||
|
The `annex-rsync-options` git configuration setting can be used to pass
|
||||||
|
parameters to rsync. Note that it is **not safe** to put "--delete"
|
||||||
|
in `annex-rsync-options` when using rsync special remotes.
|
Loading…
Add table
Reference in a new issue