diff --git a/Command/Sync.hs b/Command/Sync.hs index 9a24175682..46a03a4de8 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -52,9 +52,10 @@ import Control.Concurrent.MVar import qualified Data.Map as M cmd :: Command -cmd = command "sync" SectionCommon - "synchronize local repository with remotes" - (paramRepeating paramRemote) (seek <$$> optParser) +cmd = withGlobalOptions [jobsOption] $ + command "sync" SectionCommon + "synchronize local repository with remotes" + (paramRepeating paramRemote) (seek <$$> optParser) data SyncOptions = SyncOptions { syncWith :: CmdParams @@ -102,7 +103,8 @@ seek o = do -- Syncing involves many actions, any of which can independently -- fail, without preventing the others from running. - seekActions $ return $ concat + -- These actions cannot be run concurrently. + mapM_ includeCommandAction $ concat [ [ commit o ] , [ withbranch mergeLocal ] , map (withbranch . pullRemote) gitremotes @@ -115,14 +117,14 @@ seek o = do -- branch on the remotes in the meantime, so pull -- and merge again to avoid our push overwriting -- those changes. - seekActions $ return $ concat + mapM_ includeCommandAction $ concat [ map (withbranch . pullRemote) gitremotes , [ commitAnnex, mergeAnnex ] ] - seekActions $ return $ concat - [ [ withbranch pushLocal ] - , map (withbranch . pushRemote) gitremotes - ] + + void $ includeCommandAction $ withbranch pushLocal + -- Pushes to remotes can run concurrently. + mapM_ (commandAction . withbranch . pushRemote) gitremotes {- Merging may delete the current directory, so go to the top - of the repo. This also means that sync always acts on all files in the @@ -380,7 +382,9 @@ newer remote b = do - This ensures that preferred content expressions that match on - filenames work, even when in --all mode. - - - If any file movements were generated, returns true. + - Returns true if any file transfers were made. + - + - When concurrency is enabled, files are processed concurrently. -} seekSyncContent :: SyncOptions -> [Remote] -> Annex Bool seekSyncContent o rs = do @@ -392,15 +396,17 @@ seekSyncContent o rs = do (seekkeys mvar bloom) (const noop) [] + finishCommandActions liftIO $ not <$> isEmptyMVar mvar where seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>= mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (Just f)) noop) seekkeys mvar bloom getkeys = mapM_ (go (Left bloom) mvar Nothing) =<< getkeys - go ebloom mvar af k = do - void $ liftIO $ tryPutMVar mvar () - syncFile ebloom rs af k + go ebloom mvar af k = commandAction $ do + whenM (syncFile ebloom rs af k) $ + void $ liftIO $ tryPutMVar mvar () + return Nothing {- If it's preferred content, and we don't have it, get it from one of the - listed remotes (preferring the cheaper earlier ones). @@ -412,8 +418,10 @@ seekSyncContent o rs = do - - Drop it from each remote that has it, where it's not preferred content - (honoring numcopies). + - + - Returns True if any file transfers were made. -} -syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex () +syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex Bool syncFile ebloom rs af k = do locs <- loggedLocations k let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs @@ -443,6 +451,8 @@ syncFile ebloom rs af k = do -- the sync failed. handleDropsFrom locs' rs "unwanted" True k af Nothing callCommandAction + + return (got || not (null putrs)) where wantget have = allM id [ pure (not $ null have) diff --git a/debian/changelog b/debian/changelog index 06c6a8c38b..d545480476 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,6 +5,9 @@ git-annex (5.20150813) UNRELEASED; urgency=medium non-data-transfer overhead 6x. * --debug is passed along to git-annex-shell when git-annex is in debug mode. * Added WHEREIS to external special remote protocol. + * sync: Support --jobs + * sync --content: Avoid unnecessary second pull from remotes when + no file transfers are made. -- Joey Hess Wed, 12 Aug 2015 14:31:01 -0400 diff --git a/doc/git-annex-sync.mdwn b/doc/git-annex-sync.mdwn index 2f71805469..b4c23f8439 100644 --- a/doc/git-annex-sync.mdwn +++ b/doc/git-annex-sync.mdwn @@ -65,6 +65,16 @@ by running "git annex sync" on the remote. will only match the version of files currently in the work tree, but not past versions of files. +* `--jobs=N` `-JN` + + Enables parallel syncing with up to the specified number of jobs + running at once. For example: `-J10` + + When there are multiple git remotes, pushes will be made to them in + parallel. Pulls are not done in parallel because that tends to be + less efficient. When --content is synced, the files are processed + in parallel as well. + # SEE ALSO [[git-annex]](1) diff --git a/doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn b/doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn index cabff5ffc0..9923dcff60 100644 --- a/doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn +++ b/doc/todo/Support_--jobs_option_for___39__sync_--content__39__.mdwn @@ -1 +1,14 @@ -As the subject says. I mostly use `git annex sync --content` to transfer files between repositories, as its easier than running `git annex sync`, a bunch of `git annex copy`s and then a `git annex get` to make sure I have all the files I should have. It would be good if the shortcut could also work in parallel. +As the subject says. I mostly use `git annex sync --content` to transfer +files between repositories, as its easier than running `git annex sync`, a +bunch of `git annex copy`s and then a `git annex get` to make sure I have +all the files I should have. It would be good if the shortcut could also +work in parallel. + +> It also can be faster to push concurrent. OTOH, concurrent pulls +> can lead to the same git objects being downloaded redundantly, so best to +> avoid those I think. +> +> I've implemented this. It suffers from the same +> lack of support for displaying progress when running it parallel as +> documented on [[parallel_get]]. Other than that wart, this is [[done]]. +> --[[Joey]]