sync: Support --jobs

* sync: Support --jobs
* sync --content: Avoid unnecessary second pull from remotes when
  no file transfers are made.
This commit is contained in:
Joey Hess 2015-08-14 13:49:55 -04:00
parent 4a5fe93248
commit 87b4229b23
4 changed files with 51 additions and 15 deletions

View file

@ -52,9 +52,10 @@ import Control.Concurrent.MVar
import qualified Data.Map as M import qualified Data.Map as M
cmd :: Command cmd :: Command
cmd = command "sync" SectionCommon cmd = withGlobalOptions [jobsOption] $
"synchronize local repository with remotes" command "sync" SectionCommon
(paramRepeating paramRemote) (seek <$$> optParser) "synchronize local repository with remotes"
(paramRepeating paramRemote) (seek <$$> optParser)
data SyncOptions = SyncOptions data SyncOptions = SyncOptions
{ syncWith :: CmdParams { syncWith :: CmdParams
@ -102,7 +103,8 @@ seek o = do
-- Syncing involves many actions, any of which can independently -- Syncing involves many actions, any of which can independently
-- fail, without preventing the others from running. -- fail, without preventing the others from running.
seekActions $ return $ concat -- These actions cannot be run concurrently.
mapM_ includeCommandAction $ concat
[ [ commit o ] [ [ commit o ]
, [ withbranch mergeLocal ] , [ withbranch mergeLocal ]
, map (withbranch . pullRemote) gitremotes , map (withbranch . pullRemote) gitremotes
@ -115,14 +117,14 @@ seek o = do
-- branch on the remotes in the meantime, so pull -- branch on the remotes in the meantime, so pull
-- and merge again to avoid our push overwriting -- and merge again to avoid our push overwriting
-- those changes. -- those changes.
seekActions $ return $ concat mapM_ includeCommandAction $ concat
[ map (withbranch . pullRemote) gitremotes [ map (withbranch . pullRemote) gitremotes
, [ commitAnnex, mergeAnnex ] , [ commitAnnex, mergeAnnex ]
] ]
seekActions $ return $ concat
[ [ withbranch pushLocal ] void $ includeCommandAction $ withbranch pushLocal
, map (withbranch . pushRemote) gitremotes -- Pushes to remotes can run concurrently.
] mapM_ (commandAction . withbranch . pushRemote) gitremotes
{- Merging may delete the current directory, so go to the top {- Merging may delete the current directory, so go to the top
- of the repo. This also means that sync always acts on all files in the - of the repo. This also means that sync always acts on all files in the
@ -380,7 +382,9 @@ newer remote b = do
- This ensures that preferred content expressions that match on - This ensures that preferred content expressions that match on
- filenames work, even when in --all mode. - filenames work, even when in --all mode.
- -
- If any file movements were generated, returns true. - Returns true if any file transfers were made.
-
- When concurrency is enabled, files are processed concurrently.
-} -}
seekSyncContent :: SyncOptions -> [Remote] -> Annex Bool seekSyncContent :: SyncOptions -> [Remote] -> Annex Bool
seekSyncContent o rs = do seekSyncContent o rs = do
@ -392,15 +396,17 @@ seekSyncContent o rs = do
(seekkeys mvar bloom) (seekkeys mvar bloom)
(const noop) (const noop)
[] []
finishCommandActions
liftIO $ not <$> isEmptyMVar mvar liftIO $ not <$> isEmptyMVar mvar
where where
seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>= seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>=
mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (Just f)) noop) mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (Just f)) noop)
seekkeys mvar bloom getkeys = seekkeys mvar bloom getkeys =
mapM_ (go (Left bloom) mvar Nothing) =<< getkeys mapM_ (go (Left bloom) mvar Nothing) =<< getkeys
go ebloom mvar af k = do go ebloom mvar af k = commandAction $ do
void $ liftIO $ tryPutMVar mvar () whenM (syncFile ebloom rs af k) $
syncFile ebloom rs af k void $ liftIO $ tryPutMVar mvar ()
return Nothing
{- If it's preferred content, and we don't have it, get it from one of the {- If it's preferred content, and we don't have it, get it from one of the
- listed remotes (preferring the cheaper earlier ones). - listed remotes (preferring the cheaper earlier ones).
@ -412,8 +418,10 @@ seekSyncContent o rs = do
- -
- Drop it from each remote that has it, where it's not preferred content - Drop it from each remote that has it, where it's not preferred content
- (honoring numcopies). - (honoring numcopies).
-
- Returns True if any file transfers were made.
-} -}
syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex () syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex Bool
syncFile ebloom rs af k = do syncFile ebloom rs af k = do
locs <- loggedLocations k locs <- loggedLocations k
let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs
@ -443,6 +451,8 @@ syncFile ebloom rs af k = do
-- the sync failed. -- the sync failed.
handleDropsFrom locs' rs "unwanted" True k af handleDropsFrom locs' rs "unwanted" True k af
Nothing callCommandAction Nothing callCommandAction
return (got || not (null putrs))
where where
wantget have = allM id wantget have = allM id
[ pure (not $ null have) [ pure (not $ null have)

3
debian/changelog vendored
View file

@ -5,6 +5,9 @@ git-annex (5.20150813) UNRELEASED; urgency=medium
non-data-transfer overhead 6x. non-data-transfer overhead 6x.
* --debug is passed along to git-annex-shell when git-annex is in debug mode. * --debug is passed along to git-annex-shell when git-annex is in debug mode.
* Added WHEREIS to external special remote protocol. * Added WHEREIS to external special remote protocol.
* sync: Support --jobs
* sync --content: Avoid unnecessary second pull from remotes when
no file transfers are made.
-- Joey Hess <id@joeyh.name> Wed, 12 Aug 2015 14:31:01 -0400 -- Joey Hess <id@joeyh.name> Wed, 12 Aug 2015 14:31:01 -0400

View file

@ -65,6 +65,16 @@ by running "git annex sync" on the remote.
will only match the version of files currently in the work tree, but not will only match the version of files currently in the work tree, but not
past versions of files. past versions of files.
* `--jobs=N` `-JN`
Enables parallel syncing with up to the specified number of jobs
running at once. For example: `-J10`
When there are multiple git remotes, pushes will be made to them in
parallel. Pulls are not done in parallel because that tends to be
less efficient. When --content is synced, the files are processed
in parallel as well.
# SEE ALSO # SEE ALSO
[[git-annex]](1) [[git-annex]](1)

View file

@ -1 +1,14 @@
As the subject says. I mostly use `git annex sync --content` to transfer files between repositories, as its easier than running `git annex sync`, a bunch of `git annex copy`s and then a `git annex get` to make sure I have all the files I should have. It would be good if the shortcut could also work in parallel. As the subject says. I mostly use `git annex sync --content` to transfer
files between repositories, as its easier than running `git annex sync`, a
bunch of `git annex copy`s and then a `git annex get` to make sure I have
all the files I should have. It would be good if the shortcut could also
work in parallel.
> It also can be faster to push concurrent. OTOH, concurrent pulls
> can lead to the same git objects being downloaded redundantly, so best to
> avoid those I think.
>
> I've implemented this. It suffers from the same
> lack of support for displaying progress when running it parallel as
> documented on [[parallel_get]]. Other than that wart, this is [[done]].
> --[[Joey]]