sync: Support --jobs
* sync: Support --jobs * sync --content: Avoid unnecessary second pull from remotes when no file transfers are made.
This commit is contained in:
parent
4a5fe93248
commit
87b4229b23
4 changed files with 51 additions and 15 deletions
|
@ -52,9 +52,10 @@ import Control.Concurrent.MVar
|
|||
import qualified Data.Map as M
|
||||
|
||||
cmd :: Command
|
||||
cmd = command "sync" SectionCommon
|
||||
"synchronize local repository with remotes"
|
||||
(paramRepeating paramRemote) (seek <$$> optParser)
|
||||
cmd = withGlobalOptions [jobsOption] $
|
||||
command "sync" SectionCommon
|
||||
"synchronize local repository with remotes"
|
||||
(paramRepeating paramRemote) (seek <$$> optParser)
|
||||
|
||||
data SyncOptions = SyncOptions
|
||||
{ syncWith :: CmdParams
|
||||
|
@ -102,7 +103,8 @@ seek o = do
|
|||
|
||||
-- Syncing involves many actions, any of which can independently
|
||||
-- fail, without preventing the others from running.
|
||||
seekActions $ return $ concat
|
||||
-- These actions cannot be run concurrently.
|
||||
mapM_ includeCommandAction $ concat
|
||||
[ [ commit o ]
|
||||
, [ withbranch mergeLocal ]
|
||||
, map (withbranch . pullRemote) gitremotes
|
||||
|
@ -115,14 +117,14 @@ seek o = do
|
|||
-- branch on the remotes in the meantime, so pull
|
||||
-- and merge again to avoid our push overwriting
|
||||
-- those changes.
|
||||
seekActions $ return $ concat
|
||||
mapM_ includeCommandAction $ concat
|
||||
[ map (withbranch . pullRemote) gitremotes
|
||||
, [ commitAnnex, mergeAnnex ]
|
||||
]
|
||||
seekActions $ return $ concat
|
||||
[ [ withbranch pushLocal ]
|
||||
, map (withbranch . pushRemote) gitremotes
|
||||
]
|
||||
|
||||
void $ includeCommandAction $ withbranch pushLocal
|
||||
-- Pushes to remotes can run concurrently.
|
||||
mapM_ (commandAction . withbranch . pushRemote) gitremotes
|
||||
|
||||
{- Merging may delete the current directory, so go to the top
|
||||
- of the repo. This also means that sync always acts on all files in the
|
||||
|
@ -380,7 +382,9 @@ newer remote b = do
|
|||
- This ensures that preferred content expressions that match on
|
||||
- filenames work, even when in --all mode.
|
||||
-
|
||||
- If any file movements were generated, returns true.
|
||||
- Returns true if any file transfers were made.
|
||||
-
|
||||
- When concurrency is enabled, files are processed concurrently.
|
||||
-}
|
||||
seekSyncContent :: SyncOptions -> [Remote] -> Annex Bool
|
||||
seekSyncContent o rs = do
|
||||
|
@ -392,15 +396,17 @@ seekSyncContent o rs = do
|
|||
(seekkeys mvar bloom)
|
||||
(const noop)
|
||||
[]
|
||||
finishCommandActions
|
||||
liftIO $ not <$> isEmptyMVar mvar
|
||||
where
|
||||
seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>=
|
||||
mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (Just f)) noop)
|
||||
seekkeys mvar bloom getkeys =
|
||||
mapM_ (go (Left bloom) mvar Nothing) =<< getkeys
|
||||
go ebloom mvar af k = do
|
||||
void $ liftIO $ tryPutMVar mvar ()
|
||||
syncFile ebloom rs af k
|
||||
go ebloom mvar af k = commandAction $ do
|
||||
whenM (syncFile ebloom rs af k) $
|
||||
void $ liftIO $ tryPutMVar mvar ()
|
||||
return Nothing
|
||||
|
||||
{- If it's preferred content, and we don't have it, get it from one of the
|
||||
- listed remotes (preferring the cheaper earlier ones).
|
||||
|
@ -412,8 +418,10 @@ seekSyncContent o rs = do
|
|||
-
|
||||
- Drop it from each remote that has it, where it's not preferred content
|
||||
- (honoring numcopies).
|
||||
-
|
||||
- Returns True if any file transfers were made.
|
||||
-}
|
||||
syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex ()
|
||||
syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex Bool
|
||||
syncFile ebloom rs af k = do
|
||||
locs <- loggedLocations k
|
||||
let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs
|
||||
|
@ -443,6 +451,8 @@ syncFile ebloom rs af k = do
|
|||
-- the sync failed.
|
||||
handleDropsFrom locs' rs "unwanted" True k af
|
||||
Nothing callCommandAction
|
||||
|
||||
return (got || not (null putrs))
|
||||
where
|
||||
wantget have = allM id
|
||||
[ pure (not $ null have)
|
||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -5,6 +5,9 @@ git-annex (5.20150813) UNRELEASED; urgency=medium
|
|||
non-data-transfer overhead 6x.
|
||||
* --debug is passed along to git-annex-shell when git-annex is in debug mode.
|
||||
* Added WHEREIS to external special remote protocol.
|
||||
* sync: Support --jobs
|
||||
* sync --content: Avoid unnecessary second pull from remotes when
|
||||
no file transfers are made.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Wed, 12 Aug 2015 14:31:01 -0400
|
||||
|
||||
|
|
|
@ -65,6 +65,16 @@ by running "git annex sync" on the remote.
|
|||
will only match the version of files currently in the work tree, but not
|
||||
past versions of files.
|
||||
|
||||
* `--jobs=N` `-JN`
|
||||
|
||||
Enables parallel syncing with up to the specified number of jobs
|
||||
running at once. For example: `-J10`
|
||||
|
||||
When there are multiple git remotes, pushes will be made to them in
|
||||
parallel. Pulls are not done in parallel because that tends to be
|
||||
less efficient. When --content is synced, the files are processed
|
||||
in parallel as well.
|
||||
|
||||
# SEE ALSO
|
||||
|
||||
[[git-annex]](1)
|
||||
|
|
|
@ -1 +1,14 @@
|
|||
As the subject says. I mostly use `git annex sync --content` to transfer files between repositories, as its easier than running `git annex sync`, a bunch of `git annex copy`s and then a `git annex get` to make sure I have all the files I should have. It would be good if the shortcut could also work in parallel.
|
||||
As the subject says. I mostly use `git annex sync --content` to transfer
|
||||
files between repositories, as its easier than running `git annex sync`, a
|
||||
bunch of `git annex copy`s and then a `git annex get` to make sure I have
|
||||
all the files I should have. It would be good if the shortcut could also
|
||||
work in parallel.
|
||||
|
||||
> It also can be faster to push concurrent. OTOH, concurrent pulls
|
||||
> can lead to the same git objects being downloaded redundantly, so best to
|
||||
> avoid those I think.
|
||||
>
|
||||
> I've implemented this. It suffers from the same
|
||||
> lack of support for displaying progress when running it parallel as
|
||||
> documented on [[parallel_get]]. Other than that wart, this is [[done]].
|
||||
> --[[Joey]]
|
||||
|
|
Loading…
Add table
Reference in a new issue