git-annex/Remote/Git.hs

421 lines
13 KiB
Haskell
Raw Normal View History

{- Standard git remotes.
-
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Remote.Git (
remote,
configRead,
repoAvail,
) where
2011-03-29 17:49:54 +00:00
import qualified Data.Map as M
import Control.Exception.Extensible
2011-10-05 20:02:51 +00:00
import Common.Annex
import Utility.CopyFile
2012-09-19 18:28:32 +00:00
import Utility.Rsync
2012-01-10 19:29:10 +00:00
import Remote.Helper.Ssh
import Types.Remote
import qualified Git
import qualified Git.Config
import qualified Git.Construct
import qualified Annex
import Logs.Presence
import Logs.Transfer
import Annex.UUID
import Annex.Exception
2011-10-04 04:40:47 +00:00
import qualified Annex.Content
2011-12-12 21:38:46 +00:00
import qualified Annex.BranchState
import qualified Annex.Branch
2011-08-20 20:11:42 +00:00
import qualified Utility.Url as Url
2011-10-16 04:31:25 +00:00
import Utility.TempFile
import Config
import Init
import Types.Key
import qualified Fields
import Control.Concurrent
import System.Process (std_in, std_err)
2011-12-31 08:11:39 +00:00
remote :: RemoteType
2011-03-29 18:55:59 +00:00
remote = RemoteType {
typename = "git",
2011-03-29 21:57:20 +00:00
enumerate = list,
generate = gen,
2011-03-29 18:55:59 +00:00
setup = error "not supported"
}
2011-03-29 03:51:07 +00:00
2011-03-29 21:57:20 +00:00
list :: Annex [Git.Repo]
list = do
2011-12-14 19:30:14 +00:00
c <- fromRepo Git.config
rs <- mapM (tweakurl c) =<< fromRepo Git.remotes
mapM configRead rs
where
annexurl n = "remote." ++ n ++ ".annexurl"
tweakurl c r = do
2011-12-14 19:30:14 +00:00
let n = fromJust $ Git.remoteName r
case M.lookup (annexurl n) c of
Nothing -> return r
2011-12-14 19:30:14 +00:00
Just url -> inRepo $ \g ->
Git.Construct.remoteNamed n $
Git.Construct.fromRemoteLocation url g
{- It's assumed to be cheap to read the config of non-URL remotes, so this is
- done each time git-annex is run in a way that uses remotes.
-
- Conversely, the config of an URL remote is only read when there is no
- cached UUID value. -}
configRead :: Git.Repo -> Annex Git.Repo
configRead r = do
notignored <- repoNotIgnored r
u <- getRepoUUID r
case (repoCheap r, notignored, u) of
(_, False, _) -> return r
(True, _, _) -> tryGitConfigRead r
(False, _, NoUUID) -> tryGitConfigRead r
_ -> return r
repoCheap :: Git.Repo -> Bool
repoCheap = not . Git.repoIsUrl
2011-03-30 19:15:46 +00:00
gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex Remote
gen r u _ = new <$> remoteCost r defcst
where
defcst = if repoCheap r then cheapRemoteCost else expensiveRemoteCost
new cst = Remote
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
, storeKey = copyToRemote r
, retrieveKeyFile = copyFromRemote r
, retrieveKeyFileCheap = copyFromRemoteCheap r
, removeKey = dropKey r
, hasKey = inAnnex r
, hasKeyCheap = repoCheap r
, whereisKey = Nothing
, config = Nothing
2012-08-26 18:26:43 +00:00
, localpath = if Git.repoIsLocal r || Git.repoIsLocalUnknown r
then Just $ Git.repoPath r
else Nothing
, repo = r
2012-08-26 19:53:31 +00:00
, readonly = Git.repoIsHttp r
, remotetype = remote
}
{- Checks relatively inexpensively if a repository is available for use. -}
repoAvail :: Git.Repo -> Annex Bool
repoAvail r
| Git.repoIsHttp r = return True
| Git.repoIsUrl r = return True
| Git.repoIsLocalUnknown r = return False
| otherwise = liftIO $ catchBoolIO $ onLocal r $ return True
{- Avoids performing an action on a local repository that's not usable.
- Does not check that the repository is still available on disk. -}
guardUsable :: Git.Repo -> a -> Annex a -> Annex a
guardUsable r onerr a
| Git.repoIsLocalUnknown r = return onerr
| otherwise = a
{- Tries to read the config for a specified remote, updates state, and
- returns the updated repo. -}
tryGitConfigRead :: Git.Repo -> Annex Git.Repo
tryGitConfigRead r
2011-12-14 19:30:14 +00:00
| not $ M.null $ Git.config r = return r -- already read
| Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" [] []
| Git.repoIsHttp r = do
headers <- getHttpHeaders
store $ safely $ geturlconfig headers
| Git.repoIsUrl r = return r
| otherwise = store $ safely $ onLocal r $ do
ensureInitialized
Annex.getState Annex.repo
where
-- Reading config can fail due to IO error or
-- for other reasons; catch all possible exceptions.
safely a = either (const $ return r) return
=<< liftIO (try a :: IO (Either SomeException Git.Repo))
pipedconfig cmd params = safely $
withHandle StdoutHandle createProcessSuccess p $
Git.Config.hRead r
where
p = proc cmd $ toCommand params
geturlconfig headers = do
s <- Url.get (Git.repoLocation r ++ "/config") headers
2011-09-21 03:24:48 +00:00
withTempFile "git-annex.tmp" $ \tmpfile h -> do
hPutStr h s
hClose h
pipedconfig "git" [Param "config", Param "--null", Param "--list", Param "--file", File tmpfile]
2012-01-02 18:54:23 +00:00
store = observe $ \r' -> do
g <- gitRepo
let l = Git.remotes g
2011-12-14 19:30:14 +00:00
let g' = g { Git.remotes = exchange l r' }
Annex.changeState $ \s -> s { Annex.repo = g' }
exchange [] _ = []
2012-03-16 00:39:25 +00:00
exchange (old:ls) new
| Git.remoteName old == Git.remoteName new =
new : exchange ls new
| otherwise =
old : exchange ls new
{- Checks if a given remote has the content for a key inAnnex.
- If the remote cannot be accessed, or if it cannot determine
- whether it has the content, returns a Left error message.
-}
inAnnex :: Git.Repo -> Key -> Annex (Either String Bool)
inAnnex r key
| Git.repoIsHttp r = checkhttp =<< getHttpHeaders
| Git.repoIsUrl r = checkremote
| otherwise = checklocal
where
checkhttp headers = liftIO $ go undefined $ keyUrls r key
where
go e [] = return $ Left e
go _ (u:us) = do
res <- catchMsgIO $
Url.check u headers (keySize key)
case res of
Left e -> go e us
v -> return v
checkremote = do
showAction $ "checking " ++ Git.repoDescribe r
onRemote r (check, unknown) "inannex" [Param (key2file key)] []
where
check c p = dispatch <$> safeSystem c p
dispatch ExitSuccess = Right True
dispatch (ExitFailure 1) = Right False
dispatch _ = unknown
checklocal = guardUsable r unknown $ dispatch <$> check
where
check = liftIO $ catchMsgIO $ onLocal r $
Annex.Content.inAnnexSafe key
dispatch (Left e) = Left e
dispatch (Right (Just b)) = Right b
dispatch (Right Nothing) = unknown
unknown = Left $ "unable to check " ++ Git.repoDescribe r
{- Runs an action on a local repository inexpensively, by making an annex
- monad using that repository. -}
onLocal :: Git.Repo -> Annex a -> IO a
onLocal r a = do
s <- Annex.new r
Annex.eval s $ do
-- No need to update the branch; its data is not used
-- for anything onLocal is used to do.
2011-12-12 21:38:46 +00:00
Annex.BranchState.disableUpdate
2012-10-04 22:57:53 +00:00
a
keyUrls :: Git.Repo -> Key -> [String]
keyUrls r key = map tourl (annexLocations key)
where
tourl l = Git.repoLocation r ++ "/" ++ l
dropKey :: Git.Repo -> Key -> Annex Bool
2011-08-17 01:20:14 +00:00
dropKey r key
| not $ Git.repoIsUrl r =
guardUsable r False $ commitOnCleanup r $ liftIO $ onLocal r $ do
ensureInitialized
whenM (Annex.Content.inAnnex key) $ do
Annex.Content.lockContent key $
Annex.Content.removeAnnex key
Annex.Content.logStatus key InfoMissing
Annex.Content.saveState True
return True
2011-08-17 01:20:14 +00:00
| Git.repoIsHttp r = error "dropping from http repo not supported"
| otherwise = commitOnCleanup r $ onRemote r (boolSystem, False) "dropkey"
[ Params "--quiet --force"
, Param $ key2file key
]
[]
{- Tries to copy a key's content from a remote's annex to a file. -}
copyFromRemote :: Git.Repo -> Key -> AssociatedFile -> FilePath -> Annex Bool
copyFromRemote r key file dest
| not $ Git.repoIsUrl r = guardUsable r False $ do
params <- rsyncParams r
u <- getUUID
-- run copy from perspective of remote
liftIO $ onLocal r $ do
ensureInitialized
loc <- inRepo $ gitAnnexLocation key
upload u key file noRetry $
rsyncOrCopyFile params loc dest
| Git.repoIsSsh r = feedprogressback $ \feeder ->
rsyncHelper (Just feeder)
=<< rsyncParamsRemote r True key dest file
| Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) dest
| otherwise = error "copying from non-ssh, non-http repo not supported"
where
{- Feed local rsync's progress info back to the remote,
- by forking a feeder thread that runs
- git-annex-shell transferinfo at the same time
- git-annex-shell sendkey is running.
-
- Note that it actually waits for rsync to indicate
- progress before starting transferinfo, in order
- to ensure ssh connection caching works and reuses
- the connection set up for the sendkey.
-
- Also note that older git-annex-shell does not support
- transferinfo, so stderr is dropped and failure ignored.
-}
feedprogressback a = do
u <- getUUID
let fields = (Fields.remoteUUID, fromUUID u)
: maybe [] (\f -> [(Fields.associatedFile, f)]) file
Just (cmd, params) <- git_annex_shell r "transferinfo"
[Param $ key2file key] fields
v <- liftIO $ newEmptySampleVar
tid <- liftIO $ forkIO $ void $ tryIO $ do
bytes <- readSampleVar v
p <- createProcess $
(proc cmd (toCommand params))
{ std_in = CreatePipe
, std_err = CreatePipe
}
hClose $ stderrHandle p
let h = stdinHandle p
let send b = do
hPutStrLn h $ show b
hFlush h
send bytes
forever $
send =<< readSampleVar v
let feeder = writeSampleVar v
bracketIO noop (const $ tryIO $ killThread tid) (a feeder)
copyFromRemoteCheap :: Git.Repo -> Key -> FilePath -> Annex Bool
copyFromRemoteCheap r key file
| not $ Git.repoIsUrl r = guardUsable r False $ do
loc <- liftIO $ gitAnnexLocation key r
liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True
2012-03-16 00:39:25 +00:00
| Git.repoIsSsh r =
ifM (Annex.Content.preseedTmp key file)
( copyFromRemote r key Nothing file
2012-03-16 00:39:25 +00:00
, return False
)
| otherwise = return False
{- Tries to copy a key's content to a remote's annex. -}
2012-09-21 18:50:14 +00:00
copyToRemote :: Git.Repo -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
copyToRemote r key file p
| not $ Git.repoIsUrl r = guardUsable r False $ commitOnCleanup r $ do
keysrc <- inRepo $ gitAnnexLocation key
params <- rsyncParams r
u <- getUUID
-- run copy from perspective of remote
liftIO $ onLocal r $ ifM (Annex.Content.inAnnex key)
( return False
, do
ensureInitialized
download u key file noRetry $
Annex.Content.saveState True `after`
Annex.Content.getViaTmp key
(\d -> rsyncOrCopyFile params keysrc d p)
)
| Git.repoIsSsh r = commitOnCleanup r $ do
keysrc <- inRepo $ gitAnnexLocation key
rsyncHelper (Just p) =<< rsyncParamsRemote r False key keysrc file
| otherwise = error "copying to non-ssh repo not supported"
2012-09-21 18:50:14 +00:00
rsyncHelper :: Maybe MeterUpdate -> [CommandParam] -> Annex Bool
rsyncHelper callback params = do
showOutput -- make way for progress bar
ifM (liftIO $ (maybe rsync rsyncProgress callback) params)
2012-03-16 00:39:25 +00:00
( return True
, do
showLongNote "rsync failed -- run git annex again to resume file transfer"
2012-03-16 00:39:25 +00:00
return False
)
{- Copys a file with rsync unless both locations are on the same
- filesystem. Then cp could be faster. -}
2012-09-21 18:50:14 +00:00
rsyncOrCopyFile :: [CommandParam] -> FilePath -> FilePath -> MeterUpdate -> Annex Bool
rsyncOrCopyFile rsyncparams src dest p =
2012-09-22 00:24:08 +00:00
ifM (sameDeviceIds src dest) (docopy, dorsync)
2012-03-16 00:39:25 +00:00
where
sameDeviceIds a b = (==) <$> (getDeviceId a) <*> (getDeviceId b)
getDeviceId f = deviceID <$> liftIO (getFileStatus $ parentDir f)
dorsync = rsyncHelper (Just p) $
rsyncparams ++ [Param src, Param dest]
docopy = liftIO $ bracket
(forkIO $ watchfilesize 0)
(void . tryIO . killThread)
(const $ copyFileExternal src dest)
watchfilesize oldsz = do
threadDelay 500000 -- 0.5 seconds
v <- catchMaybeIO $
fromIntegral . fileSize
<$> getFileStatus dest
case v of
Just sz
| sz /= oldsz -> do
p sz
watchfilesize sz
_ -> watchfilesize oldsz
{- Generates rsync parameters that ssh to the remote and asks it
- to either receive or send the key's content. -}
rsyncParamsRemote :: Git.Repo -> Bool -> Key -> FilePath -> AssociatedFile -> Annex [CommandParam]
rsyncParamsRemote r sending key file afile = do
u <- getUUID
let fields = (Fields.remoteUUID, fromUUID u)
: maybe [] (\f -> [(Fields.associatedFile, f)]) afile
Just (shellcmd, shellparams) <- git_annex_shell r
(if sending then "sendkey" else "recvkey")
[ Param $ key2file key ]
fields
-- Convert the ssh command into rsync command line.
let eparam = rsyncShell (Param shellcmd:shellparams)
o <- rsyncParams r
if sending
then return $ o ++ rsyncopts eparam dummy (File file)
else return $ o ++ rsyncopts eparam (File file) dummy
where
rsyncopts ps source dest
| end ps == [dashdash] = ps ++ [source, dest]
| otherwise = ps ++ [dashdash, source, dest]
dashdash = Param "--"
-- The rsync shell parameter controls where rsync
-- goes, so the source/dest parameter can be a dummy value,
-- that just enables remote rsync mode.
-- For maximum compatability with some patched rsyncs,
-- the dummy value needs to still contain a hostname,
-- even though this hostname will never be used.
dummy = Param "dummy:"
rsyncParams :: Git.Repo -> Annex [CommandParam]
rsyncParams r = do
o <- getRemoteConfig r "rsync-options" ""
return $ options ++ map Param (words o)
where
-- --inplace to resume partial files
options = [Params "-p --progress --inplace"]
commitOnCleanup :: Git.Repo -> Annex a -> Annex a
commitOnCleanup r a = go `after` a
where
go = Annex.addCleanup (Git.repoLocation r) cleanup
cleanup
| not $ Git.repoIsUrl r = liftIO $ onLocal r $
doQuietSideAction $
Annex.Branch.commit "update"
| otherwise = void $ do
Just (shellcmd, shellparams) <-
git_annex_shell r "commit" [] []
-- Throw away stderr, since the remote may not
-- have a new enough git-annex shell to
-- support committing.
let cmd = shellcmd ++ " "
++ unwords (map shellEscape $ toCommand shellparams)
++ ">/dev/null 2>/dev/null"
liftIO $ boolSystem "sh" [Param "-c", Param cmd]