rsync special remote
Fully tested and working, including resuming and encryption. (Though not resuming when sending *with* encryption; gpg doesn't produce identical output each time.) Uses same layout as the directory special remote and the .git/annex/objects/ directory.
This commit is contained in:
parent
4381ac062f
commit
e68f128a9b
11 changed files with 265 additions and 19 deletions
12
Content.hs
12
Content.hs
|
@ -12,6 +12,7 @@ module Content (
|
|||
logStatusFor,
|
||||
getViaTmp,
|
||||
getViaTmpUnchecked,
|
||||
withTmp,
|
||||
checkDiskSpace,
|
||||
preventWrite,
|
||||
allowWrite,
|
||||
|
@ -127,6 +128,17 @@ getViaTmpUnchecked key action = do
|
|||
-- to resume its transfer
|
||||
return False
|
||||
|
||||
{- Creates a temp file, runs an action on it, and cleans up the temp file. -}
|
||||
withTmp :: Key -> (FilePath -> Annex a) -> Annex a
|
||||
withTmp key action = do
|
||||
g <- Annex.gitRepo
|
||||
let tmp = gitAnnexTmpLocation g key
|
||||
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
||||
res <- action tmp
|
||||
tmp_exists <- liftIO $ doesFileExist tmp
|
||||
when tmp_exists $ liftIO $ removeFile tmp
|
||||
return res
|
||||
|
||||
{- Checks that there is disk space available to store a given key,
|
||||
- throwing an error if not. -}
|
||||
checkDiskSpace :: Key -> Annex ()
|
||||
|
|
|
@ -48,6 +48,7 @@ import qualified Remote.Git
|
|||
import qualified Remote.S3
|
||||
import qualified Remote.Bup
|
||||
import qualified Remote.Directory
|
||||
import qualified Remote.Rsync
|
||||
|
||||
remoteTypes :: [RemoteType Annex]
|
||||
remoteTypes =
|
||||
|
@ -55,6 +56,7 @@ remoteTypes =
|
|||
, Remote.S3.remote
|
||||
, Remote.Bup.remote
|
||||
, Remote.Directory.remote
|
||||
, Remote.Rsync.remote
|
||||
]
|
||||
|
||||
{- Builds a list of all available Remotes.
|
||||
|
|
|
@ -158,7 +158,7 @@ rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool)
|
|||
rsynchelper r sending key file = do
|
||||
showProgress -- make way for progress bar
|
||||
p <- rsyncParams r sending key file
|
||||
res <- liftIO $ boolSystem "rsync" p
|
||||
res <- liftIO $ rsync p
|
||||
if res
|
||||
then return res
|
||||
else do
|
||||
|
|
193
Remote/Rsync.hs
Normal file
193
Remote/Rsync.hs
Normal file
|
@ -0,0 +1,193 @@
|
|||
{- A remote that is only accessible by rsync.
|
||||
-
|
||||
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Remote.Rsync (remote) where
|
||||
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import Control.Exception.Extensible (IOException)
|
||||
import qualified Data.Map as M
|
||||
import Control.Monad.State (liftIO, when)
|
||||
import System.FilePath
|
||||
import System.Directory
|
||||
import System.Posix.Files
|
||||
import System.Posix.Process
|
||||
|
||||
import RemoteClass
|
||||
import Types
|
||||
import qualified GitRepo as Git
|
||||
import qualified Annex
|
||||
import UUID
|
||||
import Locations
|
||||
import Config
|
||||
import Content
|
||||
import Utility
|
||||
import Remote.Special
|
||||
import Remote.Encryptable
|
||||
import Crypto
|
||||
import Messages
|
||||
import RsyncFile
|
||||
|
||||
type RsyncUrl = String
|
||||
|
||||
data RsyncOpts = RsyncOpts {
|
||||
rsyncUrl :: RsyncUrl,
|
||||
rsyncOptions :: [CommandParam]
|
||||
}
|
||||
|
||||
remote :: RemoteType Annex
|
||||
remote = RemoteType {
|
||||
typename = "rsync",
|
||||
enumerate = findSpecialRemotes "rsyncurl",
|
||||
generate = gen,
|
||||
setup = rsyncSetup
|
||||
}
|
||||
|
||||
gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex)
|
||||
gen r u c = do
|
||||
url <- getConfig r "rsyncurl" (error "missing rsyncurl")
|
||||
opts <- getConfig r "rsync-options" ""
|
||||
let o = RsyncOpts url $ map Param $ words opts
|
||||
cst <- remoteCost r expensiveRemoteCost
|
||||
return $ encryptableRemote c
|
||||
(storeEncrypted o)
|
||||
(retrieveEncrypted o)
|
||||
Remote {
|
||||
uuid = u,
|
||||
cost = cst,
|
||||
name = Git.repoDescribe r,
|
||||
storeKey = store o,
|
||||
retrieveKeyFile = retrieve o,
|
||||
removeKey = remove o,
|
||||
hasKey = checkPresent r o,
|
||||
hasKeyCheap = True,
|
||||
config = Nothing
|
||||
}
|
||||
|
||||
rsyncSetup :: UUID -> RemoteConfig -> Annex RemoteConfig
|
||||
rsyncSetup u c = do
|
||||
-- verify configuration is sane
|
||||
let url = case M.lookup "rsyncurl" c of
|
||||
Nothing -> error "Specify rsyncurl="
|
||||
Just d -> d
|
||||
c' <- encryptionSetup c
|
||||
|
||||
-- The rsyncurl is stored in git config, not only in this remote's
|
||||
-- persistant state, so it can vary between hosts.
|
||||
gitConfigSpecialRemote u c' "rsyncurl" url
|
||||
return c'
|
||||
|
||||
rsyncKey :: RsyncOpts -> Key -> String
|
||||
rsyncKey o k = rsyncUrl o </> hashDirMixed k </> f </> f
|
||||
where
|
||||
f = keyFile k
|
||||
|
||||
store :: RsyncOpts -> Key -> Annex Bool
|
||||
store o k = do
|
||||
g <- Annex.gitRepo
|
||||
rsyncSend o k (gitAnnexLocation g k)
|
||||
|
||||
storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool
|
||||
storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
|
||||
g <- Annex.gitRepo
|
||||
let f = gitAnnexLocation g k
|
||||
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
|
||||
rsyncSend o enck tmp
|
||||
|
||||
retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool
|
||||
retrieve o k f = rsyncRemote o
|
||||
-- use inplace when retrieving to support resuming
|
||||
[ Param "--inplace"
|
||||
, Param $ rsyncKey o k
|
||||
, Param f
|
||||
]
|
||||
|
||||
retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool
|
||||
retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do
|
||||
res <- retrieve o enck tmp
|
||||
if res
|
||||
then liftIO $ catchBool $ do
|
||||
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
|
||||
return True
|
||||
else return res
|
||||
|
||||
remove :: RsyncOpts -> Key -> Annex Bool
|
||||
remove o k = withRsyncScratchDir $ \tmp -> do
|
||||
{- Send an empty directory to rysnc as the parent directory
|
||||
- of the file to remove. -}
|
||||
let dummy = tmp </> keyFile k
|
||||
liftIO $ createDirectoryIfMissing True dummy
|
||||
liftIO $ rsync $ rsyncOptions o ++
|
||||
[ Params "--delete --recursive"
|
||||
, partialParams
|
||||
, Param $ addTrailingPathSeparator dummy
|
||||
, Param $ parentDir $ rsyncKey o k
|
||||
]
|
||||
|
||||
checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either IOException Bool)
|
||||
checkPresent r o k = do
|
||||
showNote ("checking " ++ Git.repoDescribe r ++ "...")
|
||||
-- note: Does not currently differnetiate between rsync failing
|
||||
-- to connect, and the file not being present.
|
||||
res <- liftIO $ boolSystem "sh" [Param "-c", Param cmd]
|
||||
return $ Right res
|
||||
where
|
||||
cmd = "rsync --quiet " ++ testfile ++ " 2>/dev/null"
|
||||
testfile = shellEscape $ rsyncKey o k
|
||||
|
||||
{- Rsync params to enable resumes of sending files safely,
|
||||
- ensure that files are only moved into place once complete
|
||||
-}
|
||||
partialParams :: CommandParam
|
||||
partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial"
|
||||
|
||||
{- Runs an action in an empty scratch directory that can be used to build
|
||||
- up trees for rsync. -}
|
||||
withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool
|
||||
withRsyncScratchDir a = do
|
||||
g <- Annex.gitRepo
|
||||
pid <- liftIO $ getProcessID
|
||||
let tmp = gitAnnexTmpDir g </> "rsynctmp" </> show pid
|
||||
nuke tmp
|
||||
liftIO $ createDirectoryIfMissing True $ tmp
|
||||
res <- a tmp
|
||||
nuke tmp
|
||||
return res
|
||||
where
|
||||
nuke d = liftIO $ do
|
||||
e <- doesDirectoryExist d
|
||||
when e $ liftIO $ removeDirectoryRecursive d
|
||||
|
||||
rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool
|
||||
rsyncRemote o params = do
|
||||
showProgress -- make way for progress bar
|
||||
res <- liftIO $ rsync $ rsyncOptions o ++ defaultParams ++ params
|
||||
if res
|
||||
then return res
|
||||
else do
|
||||
showLongNote "rsync failed -- run git annex again to resume file transfer"
|
||||
return res
|
||||
where
|
||||
defaultParams = [Params "--progress"]
|
||||
|
||||
{- To send a single key is slightly tricky; need to build up a temporary
|
||||
directory structure to pass to rsync so it can create the hash
|
||||
directories. -}
|
||||
rsyncSend :: RsyncOpts -> Key -> FilePath -> Annex Bool
|
||||
rsyncSend o k src = withRsyncScratchDir $ \tmp -> do
|
||||
let dest = tmp </> hashDirMixed k </> f </> f
|
||||
liftIO $ createDirectoryIfMissing True $ parentDir $ dest
|
||||
liftIO $ createLink src dest
|
||||
res <- rsyncRemote o
|
||||
[ Param "--recursive"
|
||||
, partialParams
|
||||
-- tmp/ to send contents of tmp dir
|
||||
, Param $ addTrailingPathSeparator tmp
|
||||
, Param $ rsyncUrl o
|
||||
]
|
||||
return res
|
||||
where
|
||||
f = keyFile k
|
|
@ -19,7 +19,6 @@ import Control.Monad (when)
|
|||
import Control.Monad.State (liftIO)
|
||||
import System.Environment
|
||||
import System.Posix.Files
|
||||
import System.Directory
|
||||
|
||||
import RemoteClass
|
||||
import Types
|
||||
|
@ -33,7 +32,7 @@ import Remote.Special
|
|||
import Remote.Encryptable
|
||||
import Crypto
|
||||
import Key
|
||||
import Utility
|
||||
import Content
|
||||
|
||||
remote :: RemoteType Annex
|
||||
remote = RemoteType {
|
||||
|
@ -108,17 +107,14 @@ store r k = s3Action r False $ \(conn, bucket) -> do
|
|||
s3Bool res
|
||||
|
||||
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
|
||||
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
|
||||
g <- Annex.gitRepo
|
||||
let f = gitAnnexLocation g k
|
||||
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) ->
|
||||
-- To get file size of the encrypted content, have to use a temp file.
|
||||
-- (An alternative would be chunking to to a constant size.)
|
||||
let tmp = gitAnnexTmpLocation g enck
|
||||
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
||||
withTmp enck $ \tmp -> do
|
||||
g <- Annex.gitRepo
|
||||
let f = gitAnnexLocation g k
|
||||
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
|
||||
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
|
||||
tmp_exists <- liftIO $ doesFileExist tmp
|
||||
when tmp_exists $ liftIO $ removeFile tmp
|
||||
s3Bool res
|
||||
|
||||
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ())
|
||||
|
|
|
@ -95,8 +95,9 @@ boolSystem command params = do
|
|||
restoresignals oldint oldset
|
||||
executeFile command True (toCommand params) Nothing
|
||||
|
||||
{- Escapes a filename to be safely able to be exposed to the shell. -}
|
||||
shellEscape :: FilePath -> String
|
||||
{- Escapes a filename or other parameter to be safely able to be exposed to
|
||||
- the shell. -}
|
||||
shellEscape :: String -> String
|
||||
shellEscape f = "'" ++ escaped ++ "'"
|
||||
where
|
||||
-- replace ' with '"'"'
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -2,6 +2,8 @@ git-annex (0.20110426) UNRELEASED; urgency=low
|
|||
|
||||
* Switch back to haskell SHA library, so git-annex remains buildable on
|
||||
Debian stable.
|
||||
* Added rsync special remotes. This could be used, for example, to
|
||||
store annexed content on rsync.net, encrypted naturally. Or anywhere else.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Tue, 26 Apr 2011 11:23:54 -0400
|
||||
|
||||
|
|
|
@ -1,5 +1,21 @@
|
|||
i think it would be useful to have a fourth kind of [[special remote]]s that connects to a dumb storage using sftp or rsync. this can be emulated by using sshfs, but that means lots of round-trips through the system and is limited to platforms where sshfs is available.
|
||||
i think it would be useful to have a fourth kind of [[special remote]]s
|
||||
that connects to a dumb storage using sftp or rsync. this can be emulated
|
||||
by using sshfs, but that means lots of round-trips through the system and
|
||||
is limited to platforms where sshfs is available.
|
||||
|
||||
typical use cases are backups to storate shared between a group of people where each user only has limited access (sftp or rsync), when using [[bup]] is not an option.
|
||||
typical use cases are backups to storate shared between a group of people
|
||||
where each user only has limited access (sftp or rsync), when using [[bup]]
|
||||
is not an option.
|
||||
|
||||
an alternative to implementing yet another special remote would be to have some kind of plugin system by which external programs can provide an interface to key-value stores (i'd implement the sftp backend myself, but haven't learned haskell yet).
|
||||
an alternative to implementing yet another special remote would be to have
|
||||
some kind of plugin system by which external programs can provide an
|
||||
interface to key-value stores (i'd implement the sftp backend myself, but
|
||||
haven't learned haskell yet).
|
||||
|
||||
> Ask and ye [[shall receive|special_remotes/rsync]].
|
||||
>
|
||||
> Sometimes I almost think that a generic configurable special remote that
|
||||
> just uses configured shell commands would be useful.. But there's really
|
||||
> no comparison with sitting down and writing code tuned to work with
|
||||
> a given transport like rsync, when it comes to reliability and taking
|
||||
> advantage of its abilities (like resuming). --[[Joey]]
|
||||
|
|
|
@ -9,6 +9,7 @@ They cannot be used by other git commands though.
|
|||
* [[Amazon_S3]]
|
||||
* [[bup]]
|
||||
* [[directory]]
|
||||
* [[rsync]]
|
||||
|
||||
## Unused content on special remotes
|
||||
|
||||
|
|
|
@ -7,4 +7,4 @@ the drive's mountpoint as a directory remote.
|
|||
|
||||
Setup example:
|
||||
|
||||
# git annex initremote usbdrive directory=/media/usbdrive/ encryption=none
|
||||
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none
|
||||
|
|
23
doc/special_remotes/rsync.mdwn
Normal file
23
doc/special_remotes/rsync.mdwn
Normal file
|
@ -0,0 +1,23 @@
|
|||
This special remote type rsyncs file contents to somewhere else.
|
||||
|
||||
Setup example:
|
||||
|
||||
# git annex initremote myrsync type=rsync rsyncurl=rsync://rsync.example.com/myrsync encryption=joey@kitenet.net
|
||||
|
||||
## configuration
|
||||
|
||||
These parameters can be passed to `git annex initremote` to configure rsync:
|
||||
|
||||
* `encryption` - Required. Either "none" to disable encryption of content
|
||||
stored in rsync,
|
||||
or a value that can be looked up (using gpg -k) to find a gpg encryption
|
||||
key that will be given access to the remote. Note that additional gpg
|
||||
keys can be given access to a remote by rerunning initremote with
|
||||
the new key id. See [[encryption]].
|
||||
|
||||
* `rsyncurl` - Required. This is the url or `hostname:/directory` to
|
||||
pass to rsync to tell it where to store content.
|
||||
|
||||
The `annex-rsync-options` git configuration setting can be used to pass
|
||||
parameters to rsync. Note that it is **not safe** to put "--delete"
|
||||
in `annex-rsync-options` when using rsync special remotes.
|
Loading…
Add table
Reference in a new issue