Support exporttree=yes for rsync special remotes.

Renaming is not supported; it might be possible to use --fuzzy to get rsync
to notice the file is being renamed, but that is a bit ..fuzzy.

On the other hand, interrupted transfers of an exported file are resumed,
since rsync is great at that. Had to adjust the exporttree docs, which
said interrupted transfers would restart.

Note that remove no longer makes the empty directory dummy, instead
sending the top-level empty directory. This works just as well and I
noticed the dummy was unncessary when refactoring it into removeGeneric.
Verified that behavior of remove is not changed, and git annex
testremote does pass.

This commit was sponsored by Brock Spratlen on Patreon.
This commit is contained in:
Joey Hess 2018-02-28 12:09:03 -04:00
parent 218c679af6
commit bed6773346
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 119 additions and 39 deletions

View file

@ -1,3 +1,9 @@
git-annex (6.20180228) UNRELEASED; urgency=medium
* Support exporttree=yes for rsync special remotes.
-- Joey Hess <id@joeyh.name> Wed, 28 Feb 2018 11:53:03 -0400
git-annex (6.20180227) upstream; urgency=medium git-annex (6.20180227) upstream; urgency=medium
* inprogress: Avoid showing failures for files not in progress. * inprogress: Avoid showing failures for files not in progress.

View file

@ -1,6 +1,6 @@
{- A remote that is only accessible by rsync. {- A remote that is only accessible by rsync.
- -
- Copyright 2011 Joey Hess <id@joeyh.name> - Copyright 2011-2018 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -29,6 +29,7 @@ import Annex.Ssh
import Remote.Helper.Special import Remote.Helper.Special
import Remote.Helper.Messages import Remote.Helper.Messages
import Remote.Helper.Export import Remote.Helper.Export
import Types.Export
import Remote.Rsync.RsyncUrl import Remote.Rsync.RsyncUrl
import Crypto import Crypto
import Utility.Rsync import Utility.Rsync
@ -49,7 +50,7 @@ remote = RemoteType
, enumerate = const (findSpecialRemotes "rsyncurl") , enumerate = const (findSpecialRemotes "rsyncurl")
, generate = gen , generate = gen
, setup = rsyncSetup , setup = rsyncSetup
, exportSupported = exportUnsupported , exportSupported = exportIsSupported
} }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -75,7 +76,14 @@ gen r u c gc = do
, lockContent = Nothing , lockContent = Nothing
, checkPresent = checkPresentDummy , checkPresent = checkPresentDummy
, checkPresentCheap = False , checkPresentCheap = False
, exportActions = exportUnsupported , exportActions = return $ ExportActions
{ storeExport = storeExportM o
, retrieveExport = retrieveExportM o
, removeExport = removeExportM o
, checkPresentExport = checkPresentExportM o
, removeExportDirectory = Just (removeExportDirectoryM o)
, renameExport = renameExportM o
}
, whereisKey = Nothing , whereisKey = Nothing
, remoteFsck = Nothing , remoteFsck = Nothing
, repairRepo = Nothing , repairRepo = Nothing
@ -165,14 +173,25 @@ rsyncSetup _ mu _ c gc = do
- pass --include=X --include=X/Y --include=X/Y/file --exclude=*) - pass --include=X --include=X/Y --include=X/Y/file --exclude=*)
-} -}
store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool
store o k src meterupdate = withRsyncScratchDir $ \tmp -> do store o k src meterupdate = storeGeneric o meterupdate basedest populatedest
let dest = tmp </> Prelude.head (keyPaths k) where
liftIO $ createDirectoryIfMissing True $ parentDir dest basedest = Prelude.head (keyPaths k)
ok <- liftIO $ if canrename populatedest dest = liftIO $ if canrename
then do then do
rename src dest rename src dest
return True return True
else createLinkOrCopy src dest else createLinkOrCopy src dest
{- If the key being sent is encrypted or chunked, the file
- containing its content is a temp file, and so can be
- renamed into place. Otherwise, the file is the annexed
- object file, and has to be copied or hard linked into place. -}
canrename = isEncKey k || isChunkKey k
storeGeneric :: RsyncOpts -> MeterUpdate -> FilePath -> (FilePath -> Annex Bool) -> Annex Bool
storeGeneric o meterupdate basedest populatedest = withRsyncScratchDir $ \tmp -> do
let dest = tmp </> basedest
liftIO $ createDirectoryIfMissing True $ parentDir dest
ok <- populatedest dest
ps <- sendParams ps <- sendParams
if ok if ok
then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++
@ -182,41 +201,18 @@ store o k src meterupdate = withRsyncScratchDir $ \tmp -> do
, Param $ rsyncUrl o , Param $ rsyncUrl o
] ]
else return False else return False
where
{- If the key being sent is encrypted or chunked, the file
- containing its content is a temp file, and so can be
- renamed into place. Otherwise, the file is the annexed
- object file, and has to be copied or hard linked into place. -}
canrename = isEncKey k || isChunkKey k
retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex () retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex ()
retrieve o f k p = retrieve o f k p =
unlessM (rsyncRetrieve o k f (Just p)) $ unlessM (rsyncRetrieveKey o k f (Just p)) $
giveup "rsync failed" giveup "rsync failed"
retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool
retrieveCheap o k _af f = ifM (preseedTmp k f) ( rsyncRetrieve o k f Nothing , return False ) retrieveCheap o k _af f = ifM (preseedTmp k f) ( rsyncRetrieveKey o k f Nothing , return False )
remove :: RsyncOpts -> Remover remove :: RsyncOpts -> Remover
remove o k = do remove o k = removeGeneric o includes
ps <- sendParams
withRsyncScratchDir $ \tmp -> liftIO $ do
{- Send an empty directory to rysnc to make it delete. -}
let dummy = tmp </> keyFile k
createDirectoryIfMissing True dummy
rsync $ rsyncOptions o ++ ps ++
map (\s -> Param $ "--include=" ++ s) includes ++
[ Param "--exclude=*" -- exclude everything else
, Param "--quiet", Param "--delete", Param "--recursive"
] ++ partialParams ++
[ Param $ addTrailingPathSeparator dummy
, Param $ rsyncUrl o
]
where where
{- Specify include rules to match the directories where the
- content could be. Note that the parent directories have
- to also be explicitly included, due to how rsync
- traverses directories. -}
includes = concatMap use dirHashes includes = concatMap use dirHashes
use h = let dir = h def k in use h = let dir = h def k in
[ parentDir dir [ parentDir dir
@ -225,18 +221,77 @@ remove o k = do
, dir </> keyFile k </> "***" , dir </> keyFile k </> "***"
] ]
{- An empty directory is rsynced to make it delete. Everything is excluded,
- except for the specified includes. Due to the way rsync traverses
- directories, the includes must match both the file to be deleted, and
- its parent directories, but not their other contents. -}
removeGeneric :: RsyncOpts -> [String] -> Annex Bool
removeGeneric o includes = do
ps <- sendParams
withRsyncScratchDir $ \tmp -> liftIO $ do
{- Send an empty directory to rysnc to make it delete. -}
rsync $ rsyncOptions o ++ ps ++
map (\s -> Param $ "--include=" ++ s) includes ++
[ Param "--exclude=*" -- exclude everything else
, Param "--quiet", Param "--delete", Param "--recursive"
] ++ partialParams ++
[ Param $ addTrailingPathSeparator tmp
, Param $ rsyncUrl o
]
checkKey :: Git.Repo -> RsyncOpts -> CheckPresent checkKey :: Git.Repo -> RsyncOpts -> CheckPresent
checkKey r o k = do checkKey r o k = do
showChecking r showChecking r
checkPresentGeneric o (rsyncUrls o k)
checkPresentGeneric :: RsyncOpts -> [RsyncUrl] -> Annex Bool
checkPresentGeneric o rsyncurls =
-- note: Does not currently differentiate between rsync failing -- note: Does not currently differentiate between rsync failing
-- to connect, and the file not being present. -- to connect, and the file not being present.
untilTrue (rsyncUrls o k) $ \u -> untilTrue rsyncurls $ \u ->
liftIO $ catchBoolIO $ do liftIO $ catchBoolIO $ do
withQuietOutput createProcessSuccess $ withQuietOutput createProcessSuccess $
proc "rsync" $ toCommand $ proc "rsync" $ toCommand $
rsyncOptions o ++ [Param u] rsyncOptions o ++ [Param u]
return True return True
storeExportM :: RsyncOpts -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
storeExportM o src _k loc meterupdate =
storeGeneric o meterupdate basedest populatedest
where
basedest = fromExportLocation loc
populatedest = liftIO . createLinkOrCopy src
retrieveExportM :: RsyncOpts -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Bool
retrieveExportM o _k loc dest p = rsyncRetrieve o [rsyncurl] dest (Just p)
where
rsyncurl = mkRsyncUrl o (fromExportLocation loc)
checkPresentExportM :: RsyncOpts -> Key -> ExportLocation -> Annex Bool
checkPresentExportM o _k loc = checkPresentGeneric o [rsyncurl]
where
rsyncurl = mkRsyncUrl o (fromExportLocation loc)
removeExportM :: RsyncOpts -> Key -> ExportLocation -> Annex Bool
removeExportM o _k loc =
removeGeneric o (includes (fromExportLocation loc))
where
includes f = f : case upFrom f of
Nothing -> []
Just f' -> includes f'
removeExportDirectoryM :: RsyncOpts -> ExportDirectory -> Annex Bool
removeExportDirectoryM o ed = removeGeneric o (allbelow d : includes d)
where
d = fromExportDirectory ed
allbelow f = f </> "***"
includes f = f : case upFrom f of
Nothing -> []
Just f' -> includes f'
renameExportM :: RsyncOpts -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportM _ _ _ _ = return False
{- Rsync params to enable resumes of sending files safely, {- Rsync params to enable resumes of sending files safely,
- ensure that files are only moved into place once complete - ensure that files are only moved into place once complete
-} -}
@ -259,15 +314,18 @@ withRsyncScratchDir a = do
t <- fromRepo gitAnnexTmpObjectDir t <- fromRepo gitAnnexTmpObjectDir
withTmpDirIn t "rsynctmp" a withTmpDirIn t "rsynctmp" a
rsyncRetrieve :: RsyncOpts -> Key -> FilePath -> Maybe MeterUpdate -> Annex Bool rsyncRetrieve :: RsyncOpts -> [RsyncUrl] -> FilePath -> Maybe MeterUpdate -> Annex Bool
rsyncRetrieve o k dest meterupdate = rsyncRetrieve o rsyncurls dest meterupdate =
showResumable $ untilTrue (rsyncUrls o k) $ \u -> rsyncRemote Download o meterupdate showResumable $ untilTrue rsyncurls $ \u -> rsyncRemote Download o meterupdate
-- use inplace when retrieving to support resuming -- use inplace when retrieving to support resuming
[ Param "--inplace" [ Param "--inplace"
, Param u , Param u
, File dest , File dest
] ]
rsyncRetrieveKey :: RsyncOpts -> Key -> FilePath -> Maybe MeterUpdate -> Annex Bool
rsyncRetrieveKey o k dest meterupdate = rsyncRetrieve o (rsyncUrls o k) dest meterupdate
showResumable :: Annex Bool -> Annex Bool showResumable :: Annex Bool -> Annex Bool
showResumable a = ifM a showResumable a = ifM a
( return True ( return True

View file

@ -1,6 +1,6 @@
{- Rsync urls. {- Rsync urls.
- -
- Copyright 2014 Joey Hess <id@joeyh.name> - Copyright 2014-2018 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -36,6 +36,9 @@ rsyncEscape o u
| rsyncShellEscape o && rsyncUrlIsShell (rsyncUrl o) = shellEscape u | rsyncShellEscape o && rsyncUrlIsShell (rsyncUrl o) = shellEscape u
| otherwise = u | otherwise = u
mkRsyncUrl :: RsyncOpts -> FilePath -> RsyncUrl
mkRsyncUrl o f = rsyncUrl o </> rsyncEscape o f
rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl] rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl]
rsyncUrls o k = map use dirHashes rsyncUrls o k = map use dirHashes
where where

View file

@ -29,7 +29,7 @@ Repeated exports are done efficiently, by diffing the old and new tree,
and transferring only the changed files, and renaming files as necessary. and transferring only the changed files, and renaming files as necessary.
Exports can be interrupted and resumed. However, partially uploaded files Exports can be interrupted and resumed. However, partially uploaded files
will be re-started from the beginning. will be re-started from the beginning in most cases.
Once content has been exported to a remote, commands like `git annex get` Once content has been exported to a remote, commands like `git annex get`
can download content from there the same as from other remotes. However, can download content from there the same as from other remotes. However,

View file

@ -22,6 +22,10 @@ These parameters can be passed to `git annex initremote` to configure rsync:
* `keyid` - Specifies the gpg key to use for [[encryption]]. * `keyid` - Specifies the gpg key to use for [[encryption]].
* `exporttree` - Set to "yes" to make this special remote usable
by [[git-annex-export]]. It will not be usable as a general-purpose
special remote.
* `shellescape` - Optional. Set to "no" to avoid shell escaping normally * `shellescape` - Optional. Set to "no" to avoid shell escaping normally
done when using rsync over ssh. That escaping is needed with typical done when using rsync over ssh. That escaping is needed with typical
setups, but not with some hosting providers that do not expose rsynced setups, but not with some hosting providers that do not expose rsynced

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="joey"
subject="""comment 8"""
date="2018-02-28T15:54:06Z"
content="""
Remotes need to have a nontrivial amount of code added to them in order to
support export. That had not been done for rsync yet. I've implemented it
now.
"""]]