468 lines
14 KiB
Haskell
468 lines
14 KiB
Haskell
{- git-annex command
|
|
-
|
|
- Copyright 2011 Joachim Breitner <mail@joachim-breitner.de>
|
|
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
-}
|
|
|
|
module Command.Sync (
|
|
cmd,
|
|
prepMerge,
|
|
mergeLocal,
|
|
mergeRemote,
|
|
commitStaged,
|
|
commitMsg,
|
|
pushBranch,
|
|
updateBranch,
|
|
syncBranch,
|
|
updateSyncBranch,
|
|
) where
|
|
|
|
import Common.Annex
|
|
import Command
|
|
import qualified Annex
|
|
import qualified Annex.Branch
|
|
import qualified Remote
|
|
import qualified Types.Remote as Remote
|
|
import Annex.Direct
|
|
import Annex.Hook
|
|
import qualified Git.Command
|
|
import qualified Git.LsFiles as LsFiles
|
|
import qualified Git.Branch
|
|
import qualified Git.Types as Git
|
|
import qualified Git.Ref
|
|
import qualified Git
|
|
import qualified Remote.Git
|
|
import Config
|
|
import Annex.Wanted
|
|
import Annex.Content
|
|
import Command.Get (getKeyFile')
|
|
import qualified Command.Move
|
|
import Logs.Location
|
|
import Annex.Drop
|
|
import Annex.UUID
|
|
import Logs.UUID
|
|
import Annex.AutoMerge
|
|
import Annex.Ssh
|
|
import Annex.BloomFilter
|
|
import Utility.Bloom
|
|
|
|
import Control.Concurrent.MVar
|
|
import qualified Data.Map as M
|
|
|
|
cmd :: Command
|
|
cmd = command "sync" SectionCommon
|
|
"synchronize local repository with remotes"
|
|
(paramRepeating paramRemote) (seek <$$> optParser)
|
|
|
|
data SyncOptions = SyncOptions
|
|
{ syncWith :: CmdParams
|
|
, contentOption :: Bool
|
|
, messageOption :: Maybe String
|
|
, keyOptions :: Maybe KeyOptions
|
|
}
|
|
|
|
optParser :: CmdParamsDesc -> Parser SyncOptions
|
|
optParser desc = SyncOptions
|
|
<$> cmdParams desc
|
|
<*> switch
|
|
( long "content"
|
|
<> help "also transfer file contents"
|
|
)
|
|
<*> optional (strOption
|
|
( long "message" <> short 'm' <> metavar "MSG"
|
|
<> help "commit message"
|
|
))
|
|
<*> optional parseAllOption
|
|
|
|
seek :: SyncOptions -> CommandSeek
|
|
seek o = do
|
|
prepMerge
|
|
|
|
-- There may not be a branch checked out until after the commit,
|
|
-- or perhaps after it gets merged from the remote, or perhaps
|
|
-- never.
|
|
-- So only look it up once it's needed, and once there is a
|
|
-- branch, cache it.
|
|
mvar <- liftIO newEmptyMVar
|
|
let getbranch = ifM (liftIO $ isEmptyMVar mvar)
|
|
( do
|
|
branch <- inRepo Git.Branch.current
|
|
when (isJust branch) $
|
|
liftIO $ putMVar mvar branch
|
|
return branch
|
|
, liftIO $ readMVar mvar
|
|
)
|
|
let withbranch a = a =<< getbranch
|
|
|
|
remotes <- syncRemotes (syncWith o)
|
|
let gitremotes = filter Remote.gitSyncableRemote remotes
|
|
let dataremotes = filter (not . remoteAnnexIgnore . Remote.gitconfig) remotes
|
|
|
|
-- Syncing involves many actions, any of which can independently
|
|
-- fail, without preventing the others from running.
|
|
seekActions $ return $ concat
|
|
[ [ commit o ]
|
|
, [ withbranch mergeLocal ]
|
|
, map (withbranch . pullRemote) gitremotes
|
|
, [ mergeAnnex ]
|
|
]
|
|
when (contentOption o) $
|
|
whenM (seekSyncContent o dataremotes) $
|
|
-- Transferring content can take a while,
|
|
-- and other changes can be pushed to the git-annex
|
|
-- branch on the remotes in the meantime, so pull
|
|
-- and merge again to avoid our push overwriting
|
|
-- those changes.
|
|
seekActions $ return $ concat
|
|
[ map (withbranch . pullRemote) gitremotes
|
|
, [ commitAnnex, mergeAnnex ]
|
|
]
|
|
seekActions $ return $ concat
|
|
[ [ withbranch pushLocal ]
|
|
, map (withbranch . pushRemote) gitremotes
|
|
]
|
|
|
|
{- Merging may delete the current directory, so go to the top
|
|
- of the repo. This also means that sync always acts on all files in the
|
|
- repository, not just on a subdirectory. -}
|
|
prepMerge :: Annex ()
|
|
prepMerge = Annex.changeDirectory =<< fromRepo Git.repoPath
|
|
|
|
syncBranch :: Git.Ref -> Git.Ref
|
|
syncBranch = Git.Ref.under "refs/heads/synced" . fromDirectBranch
|
|
|
|
remoteBranch :: Remote -> Git.Ref -> Git.Ref
|
|
remoteBranch remote = Git.Ref.underBase $ "refs/remotes/" ++ Remote.name remote
|
|
|
|
syncRemotes :: [String] -> Annex [Remote]
|
|
syncRemotes rs = ifM (Annex.getState Annex.fast) ( nub <$> pickfast , wanted )
|
|
where
|
|
pickfast = (++) <$> listed <*> (filterM good =<< fastest <$> available)
|
|
|
|
wanted
|
|
| null rs = filterM good =<< concat . Remote.byCost <$> available
|
|
| otherwise = listed
|
|
|
|
listed = concat <$> mapM Remote.byNameOrGroup rs
|
|
|
|
available = filter (remoteAnnexSync . Remote.gitconfig)
|
|
. filter (not . Remote.isXMPPRemote)
|
|
<$> Remote.remoteList
|
|
|
|
good r
|
|
| Remote.gitSyncableRemote r = Remote.Git.repoAvail $ Remote.repo r
|
|
| otherwise = return True
|
|
|
|
fastest = fromMaybe [] . headMaybe . Remote.byCost
|
|
|
|
commit :: SyncOptions -> CommandStart
|
|
commit o = ifM (annexAutoCommit <$> Annex.getGitConfig)
|
|
( go
|
|
, stop
|
|
)
|
|
where
|
|
go = next $ next $ do
|
|
commitmessage <- maybe commitMsg return (messageOption o)
|
|
showStart "commit" ""
|
|
Annex.Branch.commit "update"
|
|
ifM isDirect
|
|
( do
|
|
void stageDirect
|
|
void preCommitDirect
|
|
commitStaged Git.Branch.ManualCommit commitmessage
|
|
, do
|
|
inRepo $ Git.Branch.commitQuiet Git.Branch.ManualCommit
|
|
[ Param "-a"
|
|
, Param "-m"
|
|
, Param commitmessage
|
|
]
|
|
return True
|
|
)
|
|
|
|
commitMsg :: Annex String
|
|
commitMsg = do
|
|
u <- getUUID
|
|
m <- uuidMap
|
|
return $ "git-annex in " ++ fromMaybe "unknown" (M.lookup u m)
|
|
|
|
commitStaged :: Git.Branch.CommitMode -> String -> Annex Bool
|
|
commitStaged commitmode commitmessage = do
|
|
runAnnexHook preCommitAnnexHook
|
|
mb <- inRepo Git.Branch.currentUnsafe
|
|
let (getparent, branch) = case mb of
|
|
Just b -> (Git.Ref.sha b, b)
|
|
Nothing -> (Git.Ref.headSha, Git.Ref.headRef)
|
|
parents <- maybeToList <$> inRepo getparent
|
|
void $ inRepo $ Git.Branch.commit commitmode False commitmessage branch parents
|
|
return True
|
|
|
|
mergeLocal :: Maybe Git.Ref -> CommandStart
|
|
mergeLocal Nothing = stop
|
|
mergeLocal (Just branch) = go =<< needmerge
|
|
where
|
|
syncbranch = syncBranch branch
|
|
needmerge = ifM isBareRepo
|
|
( return False
|
|
, ifM (inRepo $ Git.Ref.exists syncbranch)
|
|
( inRepo $ Git.Branch.changed branch syncbranch
|
|
, return False
|
|
)
|
|
)
|
|
go False = stop
|
|
go True = do
|
|
showStart "merge" $ Git.Ref.describe syncbranch
|
|
next $ next $ autoMergeFrom syncbranch (Just branch) Git.Branch.ManualCommit
|
|
|
|
pushLocal :: Maybe Git.Ref -> CommandStart
|
|
pushLocal b = do
|
|
updateSyncBranch b
|
|
stop
|
|
|
|
updateSyncBranch :: Maybe Git.Ref -> Annex ()
|
|
updateSyncBranch Nothing = noop
|
|
updateSyncBranch (Just branch) = do
|
|
-- Update the sync branch to match the new state of the branch
|
|
inRepo $ updateBranch $ syncBranch branch
|
|
-- In direct mode, we're operating on some special direct mode
|
|
-- branch, rather than the intended branch, so update the indended
|
|
-- branch.
|
|
whenM isDirect $
|
|
inRepo $ updateBranch $ fromDirectBranch branch
|
|
|
|
updateBranch :: Git.Ref -> Git.Repo -> IO ()
|
|
updateBranch syncbranch g =
|
|
unlessM go $ error $ "failed to update " ++ Git.fromRef syncbranch
|
|
where
|
|
go = Git.Command.runBool
|
|
[ Param "branch"
|
|
, Param "-f"
|
|
, Param $ Git.fromRef $ Git.Ref.base syncbranch
|
|
] g
|
|
|
|
pullRemote :: Remote -> Maybe Git.Ref -> CommandStart
|
|
pullRemote remote branch = do
|
|
showStart "pull" (Remote.name remote)
|
|
next $ do
|
|
showOutput
|
|
stopUnless fetch $
|
|
next $ mergeRemote remote branch
|
|
where
|
|
fetch = inRepoWithSshOptionsTo (Remote.repo remote) (Remote.gitconfig remote) $
|
|
Git.Command.runBool
|
|
[Param "fetch", Param $ Remote.name remote]
|
|
|
|
{- The remote probably has both a master and a synced/master branch.
|
|
- Which to merge from? Well, the master has whatever latest changes
|
|
- were committed (or pushed changes, if this is a bare remote),
|
|
- while the synced/master may have changes that some
|
|
- other remote synced to this remote. So, merge them both. -}
|
|
mergeRemote :: Remote -> Maybe Git.Ref -> CommandCleanup
|
|
mergeRemote remote b = ifM isBareRepo
|
|
( return True
|
|
, case b of
|
|
Nothing -> do
|
|
branch <- inRepo Git.Branch.currentUnsafe
|
|
and <$> mapM (merge Nothing) (branchlist branch)
|
|
Just thisbranch -> do
|
|
inRepo $ updateBranch $ syncBranch thisbranch
|
|
and <$> (mapM (merge (Just thisbranch)) =<< tomerge (branchlist b))
|
|
)
|
|
where
|
|
merge thisbranch br = autoMergeFrom (remoteBranch remote br) thisbranch Git.Branch.ManualCommit
|
|
tomerge = filterM (changed remote)
|
|
branchlist Nothing = []
|
|
branchlist (Just branch) = [branch, syncBranch branch]
|
|
|
|
pushRemote :: Remote -> Maybe Git.Ref -> CommandStart
|
|
pushRemote _remote Nothing = stop
|
|
pushRemote remote (Just branch) = go =<< needpush
|
|
where
|
|
needpush
|
|
| remoteAnnexReadOnly (Remote.gitconfig remote) = return False
|
|
| otherwise = anyM (newer remote) [syncBranch branch, Annex.Branch.name]
|
|
go False = stop
|
|
go True = do
|
|
showStart "push" (Remote.name remote)
|
|
next $ next $ do
|
|
showOutput
|
|
ok <- inRepoWithSshOptionsTo (Remote.repo remote) (Remote.gitconfig remote) $
|
|
pushBranch remote branch
|
|
unless ok $ do
|
|
warning $ unwords [ "Pushing to " ++ Remote.name remote ++ " failed." ]
|
|
showLongNote "(non-fast-forward problems can be solved by setting receive.denyNonFastforwards to false in the remote's git config)"
|
|
return ok
|
|
|
|
{- Pushes a regular branch like master to a remote. Also pushes the git-annex
|
|
- branch.
|
|
-
|
|
- If the remote is a bare git repository, it's best to push the regular
|
|
- branch directly to it, so that cloning/pulling will get it.
|
|
- On the other hand, if it's not bare, pushing to the checked out branch
|
|
- will fail, and this is why we push to its syncBranch.
|
|
-
|
|
- Git offers no way to tell if a remote is bare or not, so both methods
|
|
- are tried.
|
|
-
|
|
- The direct push is likely to spew an ugly error message, so stderr is
|
|
- elided. Since git progress display goes to stderr too, the sync push
|
|
- is done first, and actually sends the data. Then the direct push is
|
|
- tried, with stderr discarded, to update the branch ref on the remote.
|
|
-
|
|
- The sync push forces the update of the remote synced/git-annex branch.
|
|
- This is necessary if a transition has rewritten the git-annex branch.
|
|
- Normally any changes to the git-annex branch get pulled and merged before
|
|
- this push, so this forcing is unlikely to overwrite new data pushed
|
|
- in from another repository that is also syncing.
|
|
-
|
|
- But overwriting of data on synced/git-annex can happen, in a race.
|
|
- The only difference caused by using a forced push in that case is that
|
|
- the last repository to push wins the race, rather than the first to push.
|
|
-
|
|
- The sync push will fail to overwrite if receive.denyNonFastforwards is
|
|
- set on the remote.
|
|
-}
|
|
pushBranch :: Remote -> Git.Ref -> Git.Repo -> IO Bool
|
|
pushBranch remote branch g = tryIO (directpush g) `after` syncpush g
|
|
where
|
|
syncpush = Git.Command.runBool $ pushparams
|
|
[ Git.Branch.forcePush $ refspec Annex.Branch.name
|
|
, refspec branch
|
|
]
|
|
directpush = Git.Command.runQuiet $ pushparams
|
|
[ Git.fromRef $ Git.Ref.base $ Annex.Branch.name
|
|
, Git.fromRef $ Git.Ref.base $ fromDirectBranch branch
|
|
]
|
|
pushparams branches =
|
|
[ Param "push"
|
|
, Param $ Remote.name remote
|
|
] ++ map Param branches
|
|
refspec b = concat
|
|
[ Git.fromRef $ Git.Ref.base b
|
|
, ":"
|
|
, Git.fromRef $ Git.Ref.base $ syncBranch b
|
|
]
|
|
|
|
commitAnnex :: CommandStart
|
|
commitAnnex = do
|
|
Annex.Branch.commit "update"
|
|
stop
|
|
|
|
mergeAnnex :: CommandStart
|
|
mergeAnnex = do
|
|
void Annex.Branch.forceUpdate
|
|
stop
|
|
|
|
changed :: Remote -> Git.Ref -> Annex Bool
|
|
changed remote b = do
|
|
let r = remoteBranch remote b
|
|
ifM (inRepo $ Git.Ref.exists r)
|
|
( inRepo $ Git.Branch.changed b r
|
|
, return False
|
|
)
|
|
|
|
newer :: Remote -> Git.Ref -> Annex Bool
|
|
newer remote b = do
|
|
let r = remoteBranch remote b
|
|
ifM (inRepo $ Git.Ref.exists r)
|
|
( inRepo $ Git.Branch.changed r b
|
|
, return True
|
|
)
|
|
|
|
{- Without --all, only looks at files in the work tree. With --all,
|
|
- makes 2 passes, first looking at the work tree and then all keys.
|
|
- This ensures that preferred content expressions that match on
|
|
- filenames work, even when in --all mode.
|
|
-
|
|
- If any file movements were generated, returns true.
|
|
-}
|
|
seekSyncContent :: SyncOptions -> [Remote] -> Annex Bool
|
|
seekSyncContent o rs = do
|
|
mvar <- liftIO newEmptyMVar
|
|
bloom <- case keyOptions o of
|
|
Just WantAllKeys -> Just <$> genBloomFilter (seekworktree mvar [])
|
|
_ -> seekworktree mvar [] (const noop) >> pure Nothing
|
|
withKeyOptions' (keyOptions o) False
|
|
(seekkeys mvar bloom)
|
|
(const noop)
|
|
[]
|
|
liftIO $ not <$> isEmptyMVar mvar
|
|
where
|
|
seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>=
|
|
mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (Just f)) noop)
|
|
seekkeys mvar bloom getkeys =
|
|
mapM_ (go (Left bloom) mvar Nothing) =<< getkeys
|
|
go ebloom mvar af k = do
|
|
void $ liftIO $ tryPutMVar mvar ()
|
|
syncFile ebloom rs af k
|
|
|
|
{- If it's preferred content, and we don't have it, get it from one of the
|
|
- listed remotes (preferring the cheaper earlier ones).
|
|
-
|
|
- Send it to each remote that doesn't have it, and for which it's
|
|
- preferred content.
|
|
-
|
|
- Drop it locally if it's not preferred content (honoring numcopies).
|
|
-
|
|
- Drop it from each remote that has it, where it's not preferred content
|
|
- (honoring numcopies).
|
|
-}
|
|
syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex ()
|
|
syncFile ebloom rs af k = do
|
|
locs <- loggedLocations k
|
|
let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs
|
|
|
|
got <- anyM id =<< handleget have
|
|
putrs <- handleput lack
|
|
|
|
u <- getUUID
|
|
let locs' = concat [[u | got], putrs, locs]
|
|
|
|
-- A bloom filter is populated with all the keys in the first pass.
|
|
-- On the second pass, avoid dropping keys that were seen in the
|
|
-- first pass, which would happen otherwise when preferred content
|
|
-- matches on the filename, which is not available in the second
|
|
-- pass.
|
|
--
|
|
-- When there's a false positive in the bloom filter, the result
|
|
-- is keeping a key that preferred content doesn't really want.
|
|
seenbloom <- case ebloom of
|
|
Left Nothing -> pure False
|
|
Left (Just bloom) -> pure (elemB k bloom)
|
|
Right bloomfeeder -> bloomfeeder k >> return False
|
|
unless seenbloom $
|
|
-- Using callCommandAction rather than
|
|
-- includeCommandAction for drops,
|
|
-- because a failure to drop does not mean
|
|
-- the sync failed.
|
|
handleDropsFrom locs' rs "unwanted" True k af
|
|
Nothing callCommandAction
|
|
where
|
|
wantget have = allM id
|
|
[ pure (not $ null have)
|
|
, not <$> inAnnex k
|
|
, wantGet True (Just k) af
|
|
]
|
|
handleget have = ifM (wantget have)
|
|
( return [ get have ]
|
|
, return []
|
|
)
|
|
get have = includeCommandAction $ do
|
|
showStart' "get" k af
|
|
next $ next $ getViaTmp k $ \dest -> getKeyFile' k af dest have
|
|
|
|
wantput r
|
|
| Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False
|
|
| otherwise = wantSend True (Just k) af (Remote.uuid r)
|
|
handleput lack = catMaybes <$> ifM (inAnnex k)
|
|
( forM lack $ \r ->
|
|
ifM (wantput r <&&> put r)
|
|
( return (Just (Remote.uuid r))
|
|
, return Nothing
|
|
)
|
|
, return []
|
|
)
|
|
put dest = includeCommandAction $ do
|
|
showStart' "copy" k af
|
|
Command.Move.toStart' dest False af k
|