git-annex/Remotes.hs

325 lines
10 KiB
Haskell
Raw Normal View History

2010-10-27 20:53:54 +00:00
{- git-annex remote repositories
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Remotes (
2010-10-14 06:41:54 +00:00
list,
readConfigs,
keyPossibilities,
2010-10-23 18:14:36 +00:00
inAnnex,
same,
byName,
2010-10-23 18:58:14 +00:00
copyFromRemote,
copyToRemote,
2011-03-05 19:31:46 +00:00
onRemote
) where
2010-11-22 21:51:55 +00:00
import Control.Exception.Extensible
2010-10-14 01:28:47 +00:00
import Control.Monad.State (liftIO)
2010-10-14 02:59:43 +00:00
import qualified Data.Map as Map
2010-10-14 03:18:58 +00:00
import Data.String.Utils
2010-12-31 19:46:33 +00:00
import System.Cmd.Utils
2010-12-29 20:21:38 +00:00
import Data.List (intersect, sortBy)
2010-11-22 21:51:55 +00:00
import Control.Monad (when, unless, filterM)
2010-10-16 20:20:49 +00:00
2010-10-14 07:18:11 +00:00
import Types
2010-10-14 06:36:41 +00:00
import qualified GitRepo as Git
2010-10-14 07:18:11 +00:00
import qualified Annex
import LocationLog
2010-10-14 03:18:58 +00:00
import Locations
import UUID
import Trust
2010-10-23 18:14:36 +00:00
import Utility
import qualified Content
2010-11-08 19:15:21 +00:00
import Messages
import CopyFile
import RsyncFile
import Ssh
{- Human visible list of remotes. -}
2010-10-14 06:41:54 +00:00
list :: [Git.Repo] -> String
2010-10-22 19:21:23 +00:00
list remotes = join ", " $ map Git.repoDescribe remotes
2011-01-04 21:20:35 +00:00
{- The git configs for the git repo's remotes is not read on startup
- because reading it may be expensive. This function tries to read the
- config for a specified remote, and updates state. If successful, it
- returns the updated git repo. -}
tryGitConfigRead :: Git.Repo -> Annex (Either Git.Repo Git.Repo)
tryGitConfigRead r
| not $ Map.null $ Git.configMap r = return $ Right r -- already read
| Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" []
| Git.repoIsUrl r = return $ Left r
| otherwise = store $ safely $ Git.configRead r
where
-- Reading config can fail due to IO error or
-- for other reasons; catch all possible exceptions.
safely a = do
result <- liftIO (try (a)::IO (Either SomeException Git.Repo))
case result of
Left _ -> return r
Right r' -> return r'
pipedconfig cmd params = safely $
2011-02-28 20:25:31 +00:00
pOpen ReadFromPipe cmd (toCommand params) $
2011-01-04 21:20:35 +00:00
Git.hConfigRead r
store a = do
r' <- a
g <- Annex.gitRepo
let l = Git.remotes g
let g' = Git.remotesAdd g $ exchange l r'
Annex.changeState $ \s -> s { Annex.repo = g' }
2011-01-04 21:20:35 +00:00
return $ Right r'
exchange [] _ = []
exchange (old:ls) new =
if Git.repoRemoteName old == Git.repoRemoteName new
then new : exchange ls new
else old : exchange ls new
{- Reads the configs of all remotes.
2011-01-04 21:15:39 +00:00
-
- This has to be called before things that rely on eg, the UUID of
- remotes. Most such things will take care of running this themselves.
-
- As reading the config of remotes can be expensive, this
- function will only read configs once per git-annex run. It's
- assumed to be cheap to read the config of non-URL remotes,
- so this is done each time git-annex is run. Conversely,
- the config of an URL remote is only read when there is no
- cached UUID value.
- -}
2011-01-04 21:20:35 +00:00
readConfigs :: Annex ()
readConfigs = do
remotesread <- Annex.getState Annex.remotesread
2011-01-04 21:15:39 +00:00
unless remotesread $ do
g <- Annex.gitRepo
2011-01-05 02:14:24 +00:00
allremotes <- filterM repoNotIgnored $ Git.remotes g
2011-01-04 21:15:39 +00:00
let cheap = filter (not . Git.repoIsUrl) allremotes
let expensive = filter Git.repoIsUrl allremotes
doexpensive <- filterM cachedUUID expensive
unless (null doexpensive) $
showNote $ "getting UUID for " ++
list doexpensive ++ "..."
let todo = cheap ++ doexpensive
unless (null todo) $ do
2011-01-31 17:52:11 +00:00
mapM_ tryGitConfigRead todo
Annex.changeState $ \s -> s { Annex.remotesread = True }
2011-01-04 21:15:39 +00:00
where
cachedUUID r = do
u <- getUUID r
return $ null u
2010-12-29 20:21:38 +00:00
{- Cost ordered lists of remotes that the LocationLog indicate may have a key.
-
- Also returns a list of UUIDs that are trusted to have the key
- (some may not have configured remotes).
-}
keyPossibilities :: Key -> Annex ([Git.Repo], [UUID])
keyPossibilities key = do
2011-01-04 21:20:35 +00:00
readConfigs
2011-01-04 21:15:39 +00:00
2010-10-14 03:18:58 +00:00
allremotes <- remotesByCost
2011-01-04 21:15:39 +00:00
g <- Annex.gitRepo
u <- getUUID g
trusted <- trustGet Trusted
2011-01-04 21:15:39 +00:00
-- get uuids of all repositories that are recorded to have the key
2011-01-04 21:15:39 +00:00
uuids <- liftIO $ keyLocations g key
let validuuids = filter (/= u) uuids
-- note that validuuids is assumed to not have dups
let validtrusteduuids = intersect validuuids trusted
-- remotes that match uuids that have the key
validremotes <- reposByUUID allremotes validuuids
return (validremotes, validtrusteduuids)
{- Checks if a given remote has the content for a key inAnnex.
- If the remote cannot be accessed, returns a Left error.
-}
inAnnex :: Git.Repo -> Key -> Annex (Either IOException Bool)
2010-11-22 21:51:55 +00:00
inAnnex r key = if Git.repoIsUrl r
then checkremote
else liftIO (try checklocal ::IO (Either IOException Bool))
where
checklocal = do
2010-12-31 19:46:33 +00:00
-- run a local check inexpensively,
-- by making an Annex monad using the remote
a <- Annex.new r []
Annex.eval a (Content.inAnnex key)
checkremote = do
2010-11-08 19:15:21 +00:00
showNote ("checking " ++ Git.repoDescribe r ++ "...")
2010-12-31 19:52:59 +00:00
inannex <- onRemote r (boolSystem, False) "inannex"
[Param (show key)]
return $ Right inannex
{- Cost Ordered list of remotes. -}
2010-10-14 06:36:41 +00:00
remotesByCost :: Annex [Git.Repo]
2010-10-14 01:28:47 +00:00
remotesByCost = do
2010-10-14 07:18:11 +00:00
g <- Annex.gitRepo
2010-10-14 06:36:41 +00:00
reposByCost $ Git.remotes g
{- Orders a list of git repos by cost. Throws out ignored ones. -}
2010-10-14 06:36:41 +00:00
reposByCost :: [Git.Repo] -> Annex [Git.Repo]
2010-10-14 01:28:47 +00:00
reposByCost l = do
notignored <- filterM repoNotIgnored l
costpairs <- mapM costpair notignored
2010-11-22 21:51:55 +00:00
return $ fst $ unzip $ sortBy cmpcost costpairs
where
2010-10-14 01:28:47 +00:00
costpair r = do
cost <- repoCost r
return (r, cost)
2010-11-22 21:51:55 +00:00
cmpcost (_, c1) (_, c2) = compare c1 c2
{- Calculates cost for a repo.
-
- The default cost is 100 for local repositories, and 200 for remote
- repositories; it can also be configured by remote.<name>.annex-cost
-}
2010-10-14 06:36:41 +00:00
repoCost :: Git.Repo -> Annex Int
2010-10-14 01:28:47 +00:00
repoCost r = do
2011-03-05 19:31:46 +00:00
cost <- Annex.repoConfig r "cost" ""
2010-11-22 21:51:55 +00:00
if not $ null cost
then return $ read cost
2010-11-22 21:51:55 +00:00
else if Git.repoIsUrl r
then return 200
else return 100
2010-10-14 02:59:43 +00:00
{- Checks if a repo should be ignored, based either on annex-ignore
- setting, or on command-line options. Allows command-line to override
- annex-ignore. -}
repoNotIgnored :: Git.Repo -> Annex Bool
repoNotIgnored r = do
2011-03-05 19:31:46 +00:00
ignored <- Annex.repoConfig r "ignore" "false"
to <- match Annex.toremote
from <- match Annex.fromremote
if to || from
then return True
2010-11-22 21:51:55 +00:00
else return $ not $ Git.configTrue ignored
where
match a = do
name <- Annex.getState a
case name of
Nothing -> return False
n -> return $ n == Git.repoRemoteName r
{- Checks if two repos are the same, by comparing their remote names. -}
same :: Git.Repo -> Git.Repo -> Bool
same a b = Git.repoRemoteName a == Git.repoRemoteName b
2011-03-03 21:21:00 +00:00
{- Looks up a remote by name. (Or by UUID.) -}
byName :: String -> Annex Git.Repo
2011-01-26 20:20:28 +00:00
byName "." = Annex.gitRepo -- special case to refer to current repository
byName name = do
2010-10-28 18:20:02 +00:00
when (null name) $ error "no remote specified"
g <- Annex.gitRepo
2011-03-03 21:21:00 +00:00
match <- filterM matching $ Git.remotes g
2010-10-28 18:20:02 +00:00
when (null match) $ error $
"there is no git remote named \"" ++ name ++ "\""
2010-11-22 21:51:55 +00:00
return $ head match
2011-03-03 21:21:00 +00:00
where
matching r = do
if Just name == Git.repoRemoteName r
then return True
else do
u <- getUUID r
return $ (name == u)
2010-10-23 18:14:36 +00:00
{- Tries to copy a key's content from a remote's annex to a file. -}
2010-10-23 18:14:36 +00:00
copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool
2010-11-22 21:51:55 +00:00
copyFromRemote r key file
| not $ Git.repoIsUrl r = liftIO $ copyFile (gitAnnexLocation r key) file
| Git.repoIsSsh r = rsynchelper r True key file
2010-11-22 21:51:55 +00:00
| otherwise = error "copying from non-ssh repo not supported"
2010-10-23 18:58:14 +00:00
{- Tries to copy a key's content to a remote's annex. -}
copyToRemote :: Git.Repo -> Key -> Annex Bool
copyToRemote r key
| not $ Git.repoIsUrl r = do
g <- Annex.gitRepo
let keysrc = gitAnnexLocation g key
-- run copy from perspective of remote
liftIO $ do
a <- Annex.new r []
Annex.eval a $ do
ok <- Content.getViaTmp key $
\f -> liftIO $ copyFile keysrc f
Annex.queueRun
return ok
| Git.repoIsSsh r = do
g <- Annex.gitRepo
let keysrc = gitAnnexLocation g key
rsynchelper r False key keysrc
| otherwise = error "copying to non-ssh repo not supported"
rsynchelper :: Git.Repo -> Bool -> Key -> FilePath -> Annex (Bool)
rsynchelper r sending key file = do
showProgress -- make way for progress bar
p <- rsyncParams r sending key file
res <- liftIO $ boolSystem "rsync" p
if res
then return res
else do
showLongNote "rsync failed -- run git annex again to resume file transfer"
return res
{- Generates rsync parameters that ssh to the remote and asks it
- to either receive or send the key's content. -}
2011-02-28 20:25:31 +00:00
rsyncParams :: Git.Repo -> Bool -> Key -> FilePath -> Annex [CommandParam]
rsyncParams r sending key file = do
Just (shellcmd, shellparams) <- git_annex_shell r
(if sending then "sendkey" else "recvkey")
[ Param $ show key
-- Command is terminated with "--", because
-- rsync will tack on its own options afterwards,
-- and they need to be ignored.
, Param "--"
]
-- Convert the ssh command into rsync command line.
let eparam = rsyncShell (Param shellcmd:shellparams)
2011-03-05 19:31:46 +00:00
o <- Annex.repoConfig r "rsync-options" ""
let base = options ++ map Param (words o) ++ eparam
if sending
then return $ base ++ [dummy, File file]
else return $ base ++ [File file, dummy]
where
-- inplace makes rsync resume partial files
options = [Params "-p --progress --inplace"]
-- the rsync shell parameter controls where rsync
-- goes, so the source/dest parameter can be a dummy value,
-- that just enables remote rsync mode.
dummy = Param ":"
{- Uses a supplied function to run a git-annex-shell command on a remote.
-
- Or, if the remote does not support running remote commands, returns
- a specified error value. -}
2010-12-31 19:46:33 +00:00
onRemote
:: Git.Repo
2011-02-28 20:25:31 +00:00
-> (FilePath -> [CommandParam] -> IO a, a)
2010-12-31 19:46:33 +00:00
-> String
2011-02-28 20:25:31 +00:00
-> [CommandParam]
2010-12-31 19:46:33 +00:00
-> Annex a
onRemote r (with, errorval) command params = do
s <- git_annex_shell r command params
case s of
Just (c, ps) -> liftIO $ with c ps
Nothing -> return errorval
{- Generates parameters to run a git-annex-shell command on a remote. -}
2011-02-28 20:25:31 +00:00
git_annex_shell :: Git.Repo -> String -> [CommandParam] -> Annex (Maybe (FilePath, [CommandParam]))
git_annex_shell r command params
| not $ Git.repoIsUrl r = return $ Just (shellcmd, shellopts)
2010-12-31 19:46:33 +00:00
| Git.repoIsSsh r = do
sshparams <- sshToRepo r [Param sshcmd]
return $ Just ("ssh", sshparams)
| otherwise = return Nothing
where
dir = Git.workTree r
2010-12-31 19:46:33 +00:00
shellcmd = "git-annex-shell"
shellopts = (Param command):(File dir):params
sshcmd = shellcmd ++ " " ++
2011-02-28 20:25:31 +00:00
unwords (map shellEscape $ toCommand shellopts)