sync: Support --jobs
* sync: Support --jobs * sync --content: Avoid unnecessary second pull from remotes when no file transfers are made.
This commit is contained in:
parent
4a5fe93248
commit
87b4229b23
4 changed files with 51 additions and 15 deletions
|
@ -52,9 +52,10 @@ import Control.Concurrent.MVar
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
cmd = command "sync" SectionCommon
|
cmd = withGlobalOptions [jobsOption] $
|
||||||
"synchronize local repository with remotes"
|
command "sync" SectionCommon
|
||||||
(paramRepeating paramRemote) (seek <$$> optParser)
|
"synchronize local repository with remotes"
|
||||||
|
(paramRepeating paramRemote) (seek <$$> optParser)
|
||||||
|
|
||||||
data SyncOptions = SyncOptions
|
data SyncOptions = SyncOptions
|
||||||
{ syncWith :: CmdParams
|
{ syncWith :: CmdParams
|
||||||
|
@ -102,7 +103,8 @@ seek o = do
|
||||||
|
|
||||||
-- Syncing involves many actions, any of which can independently
|
-- Syncing involves many actions, any of which can independently
|
||||||
-- fail, without preventing the others from running.
|
-- fail, without preventing the others from running.
|
||||||
seekActions $ return $ concat
|
-- These actions cannot be run concurrently.
|
||||||
|
mapM_ includeCommandAction $ concat
|
||||||
[ [ commit o ]
|
[ [ commit o ]
|
||||||
, [ withbranch mergeLocal ]
|
, [ withbranch mergeLocal ]
|
||||||
, map (withbranch . pullRemote) gitremotes
|
, map (withbranch . pullRemote) gitremotes
|
||||||
|
@ -115,14 +117,14 @@ seek o = do
|
||||||
-- branch on the remotes in the meantime, so pull
|
-- branch on the remotes in the meantime, so pull
|
||||||
-- and merge again to avoid our push overwriting
|
-- and merge again to avoid our push overwriting
|
||||||
-- those changes.
|
-- those changes.
|
||||||
seekActions $ return $ concat
|
mapM_ includeCommandAction $ concat
|
||||||
[ map (withbranch . pullRemote) gitremotes
|
[ map (withbranch . pullRemote) gitremotes
|
||||||
, [ commitAnnex, mergeAnnex ]
|
, [ commitAnnex, mergeAnnex ]
|
||||||
]
|
]
|
||||||
seekActions $ return $ concat
|
|
||||||
[ [ withbranch pushLocal ]
|
void $ includeCommandAction $ withbranch pushLocal
|
||||||
, map (withbranch . pushRemote) gitremotes
|
-- Pushes to remotes can run concurrently.
|
||||||
]
|
mapM_ (commandAction . withbranch . pushRemote) gitremotes
|
||||||
|
|
||||||
{- Merging may delete the current directory, so go to the top
|
{- Merging may delete the current directory, so go to the top
|
||||||
- of the repo. This also means that sync always acts on all files in the
|
- of the repo. This also means that sync always acts on all files in the
|
||||||
|
@ -380,7 +382,9 @@ newer remote b = do
|
||||||
- This ensures that preferred content expressions that match on
|
- This ensures that preferred content expressions that match on
|
||||||
- filenames work, even when in --all mode.
|
- filenames work, even when in --all mode.
|
||||||
-
|
-
|
||||||
- If any file movements were generated, returns true.
|
- Returns true if any file transfers were made.
|
||||||
|
-
|
||||||
|
- When concurrency is enabled, files are processed concurrently.
|
||||||
-}
|
-}
|
||||||
seekSyncContent :: SyncOptions -> [Remote] -> Annex Bool
|
seekSyncContent :: SyncOptions -> [Remote] -> Annex Bool
|
||||||
seekSyncContent o rs = do
|
seekSyncContent o rs = do
|
||||||
|
@ -392,15 +396,17 @@ seekSyncContent o rs = do
|
||||||
(seekkeys mvar bloom)
|
(seekkeys mvar bloom)
|
||||||
(const noop)
|
(const noop)
|
||||||
[]
|
[]
|
||||||
|
finishCommandActions
|
||||||
liftIO $ not <$> isEmptyMVar mvar
|
liftIO $ not <$> isEmptyMVar mvar
|
||||||
where
|
where
|
||||||
seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>=
|
seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>=
|
||||||
mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (Just f)) noop)
|
mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (Just f)) noop)
|
||||||
seekkeys mvar bloom getkeys =
|
seekkeys mvar bloom getkeys =
|
||||||
mapM_ (go (Left bloom) mvar Nothing) =<< getkeys
|
mapM_ (go (Left bloom) mvar Nothing) =<< getkeys
|
||||||
go ebloom mvar af k = do
|
go ebloom mvar af k = commandAction $ do
|
||||||
void $ liftIO $ tryPutMVar mvar ()
|
whenM (syncFile ebloom rs af k) $
|
||||||
syncFile ebloom rs af k
|
void $ liftIO $ tryPutMVar mvar ()
|
||||||
|
return Nothing
|
||||||
|
|
||||||
{- If it's preferred content, and we don't have it, get it from one of the
|
{- If it's preferred content, and we don't have it, get it from one of the
|
||||||
- listed remotes (preferring the cheaper earlier ones).
|
- listed remotes (preferring the cheaper earlier ones).
|
||||||
|
@ -412,8 +418,10 @@ seekSyncContent o rs = do
|
||||||
-
|
-
|
||||||
- Drop it from each remote that has it, where it's not preferred content
|
- Drop it from each remote that has it, where it's not preferred content
|
||||||
- (honoring numcopies).
|
- (honoring numcopies).
|
||||||
|
-
|
||||||
|
- Returns True if any file transfers were made.
|
||||||
-}
|
-}
|
||||||
syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex ()
|
syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex Bool
|
||||||
syncFile ebloom rs af k = do
|
syncFile ebloom rs af k = do
|
||||||
locs <- loggedLocations k
|
locs <- loggedLocations k
|
||||||
let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs
|
let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs
|
||||||
|
@ -443,6 +451,8 @@ syncFile ebloom rs af k = do
|
||||||
-- the sync failed.
|
-- the sync failed.
|
||||||
handleDropsFrom locs' rs "unwanted" True k af
|
handleDropsFrom locs' rs "unwanted" True k af
|
||||||
Nothing callCommandAction
|
Nothing callCommandAction
|
||||||
|
|
||||||
|
return (got || not (null putrs))
|
||||||
where
|
where
|
||||||
wantget have = allM id
|
wantget have = allM id
|
||||||
[ pure (not $ null have)
|
[ pure (not $ null have)
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -5,6 +5,9 @@ git-annex (5.20150813) UNRELEASED; urgency=medium
|
||||||
non-data-transfer overhead 6x.
|
non-data-transfer overhead 6x.
|
||||||
* --debug is passed along to git-annex-shell when git-annex is in debug mode.
|
* --debug is passed along to git-annex-shell when git-annex is in debug mode.
|
||||||
* Added WHEREIS to external special remote protocol.
|
* Added WHEREIS to external special remote protocol.
|
||||||
|
* sync: Support --jobs
|
||||||
|
* sync --content: Avoid unnecessary second pull from remotes when
|
||||||
|
no file transfers are made.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Wed, 12 Aug 2015 14:31:01 -0400
|
-- Joey Hess <id@joeyh.name> Wed, 12 Aug 2015 14:31:01 -0400
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,16 @@ by running "git annex sync" on the remote.
|
||||||
will only match the version of files currently in the work tree, but not
|
will only match the version of files currently in the work tree, but not
|
||||||
past versions of files.
|
past versions of files.
|
||||||
|
|
||||||
|
* `--jobs=N` `-JN`
|
||||||
|
|
||||||
|
Enables parallel syncing with up to the specified number of jobs
|
||||||
|
running at once. For example: `-J10`
|
||||||
|
|
||||||
|
When there are multiple git remotes, pushes will be made to them in
|
||||||
|
parallel. Pulls are not done in parallel because that tends to be
|
||||||
|
less efficient. When --content is synced, the files are processed
|
||||||
|
in parallel as well.
|
||||||
|
|
||||||
# SEE ALSO
|
# SEE ALSO
|
||||||
|
|
||||||
[[git-annex]](1)
|
[[git-annex]](1)
|
||||||
|
|
|
@ -1 +1,14 @@
|
||||||
As the subject says. I mostly use `git annex sync --content` to transfer files between repositories, as its easier than running `git annex sync`, a bunch of `git annex copy`s and then a `git annex get` to make sure I have all the files I should have. It would be good if the shortcut could also work in parallel.
|
As the subject says. I mostly use `git annex sync --content` to transfer
|
||||||
|
files between repositories, as its easier than running `git annex sync`, a
|
||||||
|
bunch of `git annex copy`s and then a `git annex get` to make sure I have
|
||||||
|
all the files I should have. It would be good if the shortcut could also
|
||||||
|
work in parallel.
|
||||||
|
|
||||||
|
> It also can be faster to push concurrent. OTOH, concurrent pulls
|
||||||
|
> can lead to the same git objects being downloaded redundantly, so best to
|
||||||
|
> avoid those I think.
|
||||||
|
>
|
||||||
|
> I've implemented this. It suffers from the same
|
||||||
|
> lack of support for displaying progress when running it parallel as
|
||||||
|
> documented on [[parallel_get]]. Other than that wart, this is [[done]].
|
||||||
|
> --[[Joey]]
|
||||||
|
|
Loading…
Add table
Reference in a new issue