git-annex/Remote/WebDAV.hs

528 lines
17 KiB
Haskell
Raw Normal View History

{- WebDAV remotes.
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Remote.WebDAV (remote, davCreds, configUrl) where
import Network.Protocol.HTTP.DAV
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.UTF8 as B8
import qualified Data.ByteString.Lazy.UTF8 as L8
import Network.HTTP.Client (HttpException(..), RequestBody)
import qualified Network.HTTP.Client as HTTP
import Network.HTTP.Client (HttpExceptionContent(..), responseStatus)
import Network.HTTP.Types
import System.IO.Error
import Control.Monad.Catch
import Control.Monad.IO.Class (MonadIO)
import System.Log.Logger (debugM)
import Control.Concurrent.STM hiding (check)
import Annex.Common
import Types.Remote
2017-09-15 20:34:45 +00:00
import Types.Export
import qualified Git
import qualified Annex
import Config
import Config.Cost
import Annex.SpecialRemote.Config
import Remote.Helper.Special
2015-08-17 14:42:14 +00:00
import Remote.Helper.Messages
import Remote.Helper.Http
2019-02-20 19:55:01 +00:00
import Remote.Helper.ExportImport
import qualified Remote.Helper.Chunked.Legacy as Legacy
import Creds
import Utility.Metered
import Utility.Url (URLString, matchStatusCodeException, matchHttpExceptionContent)
import Annex.UUID
import Remote.WebDAV.DavLocation
import Types.ProposedAccepted
remote :: RemoteType
remote = specialRemoteType $ RemoteType
{ typename = "webdav"
, enumerate = const (findSpecialRemotes "webdav")
, generate = gen
, configParser = mkRemoteConfigParser
[ optionalStringParser urlField
(FieldDesc "(required) url to the WebDAV directory")
, optionalStringParser davcredsField HiddenField
]
, setup = webdavSetup
, exportSupported = exportIsSupported
2019-02-20 19:55:01 +00:00
, importSupported = importUnsupported
add thirdPartyPopulated interface This is to support, eg a borg repo as a special remote, which is populated not by running git-annex commands, but by using borg. Then git-annex sync lists the content of the remote, learns which files are annex objects, and treats those as present in the remote. So, most of the import machinery is reused, to a new purpose. While normally importtree maintains a remote tracking branch, this does not, because the files stored in the remote are annex object files, not user-visible filenames. But, internally, a git tree is still generated, of the files on the remote that are annex objects. This tree is used by retrieveExportWithContentIdentifier, etc. As with other import/export remotes, that the tree is recorded in the export log, and gets grafted into the git-annex branch. importKey changed to be able to return Nothing, to indicate when an ImportLocation is not an annex object and so should be skipped from being included in the tree. It did not seem to make sense to have git-annex import do this, since from the user's perspective, it's not like other imports. So only git-annex sync does it. Note that, git-annex sync does not yet download objects from such remotes that are preferred content. importKeys is run with content downloading disabled, to avoid getting the content of all objects. Perhaps what's needed is for seekSyncContent to be run with these remotes, but I don't know if it will just work (in particular, it needs to avoid trying to transfer objects to them), so I skipped that for now. (Untested and unused as of yet.) This commit was sponsored by Jochen Bartl on Patreon.
2020-12-18 18:52:57 +00:00
, thirdPartyPopulated = False
}
urlField :: RemoteConfigField
urlField = Accepted "url"
davcredsField :: RemoteConfigField
davcredsField = Accepted "davcreds"
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs = do
c <- parsedRemoteConfig remote rc
new
<$> pure c
<*> remoteCost gc expensiveRemoteCost
<*> mkDavHandleVar c gc u
2012-11-30 04:55:59 +00:00
where
new c cst hdl = Just $ specialRemote c
(store hdl chunkconfig)
(retrieve hdl chunkconfig)
(remove hdl)
(checkKey hdl this chunkconfig)
this
2012-11-30 04:55:59 +00:00
where
2014-12-16 19:26:13 +00:00
this = Remote
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileCheap = Nothing
-- HttpManagerRestricted is used here, so this is
-- secure.
, retrievalSecurityPolicy = RetrievalAllKeysSecure
2014-12-16 19:26:13 +00:00
, removeKey = removeKeyDummy
, lockContent = Nothing
2014-12-16 19:26:13 +00:00
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = ExportActions
{ storeExport = storeExportDav hdl
, retrieveExport = retrieveExportDav hdl
, checkPresentExport = checkPresentExportDav hdl this
, removeExport = removeExportDav hdl
, versionedExport = False
, removeExportDirectory = Just $
removeExportDirectoryDav hdl
, renameExport = renameExportDav hdl
}
2019-02-20 19:55:01 +00:00
, importActions = importUnsupported
2014-12-16 19:26:13 +00:00
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
, config = c
, getRepo = return r
2014-12-16 19:26:13 +00:00
, gitconfig = gc
, localpath = Nothing
, readonly = False
, appendonly = False
, untrustworthy = False
2014-12-16 19:26:13 +00:00
, availability = GloballyAvailable
, remotetype = remote
, mkUnavailable = gen r u (M.insert urlField (Proposed "http://!dne!/") rc) gc rs
2014-12-16 19:26:13 +00:00
, getInfo = includeCredsInfo c (davCreds u) $
[("url", fromMaybe "unknown" $ getRemoteConfigValue urlField c)]
2014-12-16 19:26:13 +00:00
, claimUrl = Nothing
, checkUrl = Nothing
, remoteStateHandle = rs
2014-12-16 19:26:13 +00:00
}
chunkconfig = getChunkConfig c
webdavSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
webdavSetup _ mu mcreds c gc = do
u <- maybe (liftIO genUUID) return mu
url <- maybe (giveup "Specify url=")
(return . fromProposedAccepted)
(M.lookup urlField c)
(c', encsetup) <- encryptionSetup c gc
pc <- either giveup return . parseRemoteConfig c' =<< configParser remote c'
creds <- maybe (getCreds pc gc u) (return . Just) mcreds
testDav url creds
gitConfigSpecialRemote u c' [("webdav", "true")]
fix embedcreds=yes reversion Fix bug that made enableremote of S3 and webdav remotes, that have embedcreds=yes, fail to set up the embedded creds, so accessing the remotes failed. (Regression introduced in version 7.20200202.7 in when reworking all the remote configs to be parsed.) Root problem is that parseEncryptionConfig excludes all other config keys except encryption ones, so it is then unable to find the credPairRemoteField. And since that field is not required to be present, it proceeds as if it's not, rather than failing in any visible way. This causes it to not find any creds, and so it does not cache them. When when the S3 remote tries to make a S3 connection, it finds no creds, so assumes it's being used in no-creds mode, and tries to find a public url. With no public url available, it fails, but the failure doesn't say a lack of creds is the problem. Fix is to provide setRemoteCredPair with a ParsedRemoteConfig, so the full set of configs of the remote can be parsed. A bit annoying to need to parse the remote config before the full config (as returned by setRemoteCredPair) is available, but this avoids the problem. I assume webdav also had the problem by inspection, but didn't try to reproduce it with it. Also, getRemoteCredPair used getRemoteConfigValue to get a ProposedAccepted String, but that does not seem right. Now that it runs that code, it crashed saying it had just a String. Remotes that have already been enableremoted, and so lack the cached creds file will work after this fix, because getRemoteCredPair will extract the creds from the remote config, writing the missing file. This commit was sponsored by Ilya Shlyakhter on Patreon.
2020-05-21 18:34:29 +00:00
c'' <- setRemoteCredPair encsetup pc gc (davCreds u) creds
return (c'', u)
store :: DavHandleVar -> ChunkConfig -> Storer
store hv (LegacyChunks chunksize) = fileStorer $ \k f p ->
withDavHandle hv $ \dav -> do
annexrunner <- Annex.makeRunner
liftIO $ withMeteredFile f p $ storeLegacyChunked annexrunner chunksize k dav
store hv _ = httpStorer $ \k reqbody ->
withDavHandle hv $ \dav -> liftIO $ goDAV dav $ do
let tmp = keyTmpLocation k
let dest = keyLocation k
storeHelper dav tmp dest reqbody
storeHelper :: DavHandle -> DavLocation -> DavLocation -> RequestBody -> DAVT IO ()
storeHelper dav tmp dest reqbody = do
maybe noop (void . mkColRecursive) (locationParent tmp)
debugDav $ "putContent " ++ tmp
inLocation tmp $
putContentM' (contentType, reqbody)
finalizeStore dav tmp dest
finalizeStore :: DavHandle -> DavLocation -> DavLocation -> DAVT IO ()
finalizeStore dav tmp dest = do
debugDav $ "delContent " ++ dest
inLocation dest $ void $ safely $ delContentM
maybe noop (void . mkColRecursive) (locationParent dest)
moveDAV (baseURL dav) tmp dest
retrieve :: DavHandleVar -> ChunkConfig -> Retriever
retrieve hv cc = fileRetriever $ \d k p ->
withDavHandle hv $ \dav -> case cc of
LegacyChunks _ -> retrieveLegacyChunked d k p dav
_ -> liftIO $
goDAV dav $ retrieveHelper (keyLocation k) d p
retrieveHelper :: DavLocation -> FilePath -> MeterUpdate -> DAVT IO ()
retrieveHelper loc d p = do
debugDav $ "retrieve " ++ loc
inLocation loc $
withContentM $ httpBodyRetriever d p
remove :: DavHandleVar -> Remover
remove hv k = withDavHandle hv $ \dav -> liftIO $ goDAV dav $
-- Delete the key's whole directory, including any
-- legacy chunked files, etc, in a single action.
removeHelper (keyDir k)
removeHelper :: DavLocation -> DAVT IO ()
removeHelper d = do
debugDav $ "delContent " ++ d
v <- safely $ inLocation d delContentM
case v of
Just _ -> return ()
Nothing -> do
v' <- existsDAV d
case v' of
Right False -> return ()
_ -> giveup "failed to remove content from remote"
checkKey :: DavHandleVar -> Remote -> ChunkConfig -> CheckPresent
checkKey hv r chunkconfig k = withDavHandle hv $ \dav -> do
showChecking r
case chunkconfig of
LegacyChunks _ -> checkKeyLegacyChunked dav k
_ -> do
v <- liftIO $ goDAV dav $
existsDAV (keyLocation k)
either giveup return v
2020-05-15 16:17:15 +00:00
storeExportDav :: DavHandleVar -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
storeExportDav hdl f k loc p = case exportLocation loc of
2020-05-15 16:17:15 +00:00
Right dest -> withDavHandle hdl $ \h -> runExport h $ \dav -> do
reqbody <- liftIO $ httpBodyStorer f p
storeHelper dav (keyTmpLocation k) dest reqbody
2020-05-15 16:17:15 +00:00
Left err -> giveup err
retrieveExportDav :: DavHandleVar -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
retrieveExportDav hdl _k loc d p = case exportLocation loc of
Right src -> withDavHandle hdl $ \h -> runExport h $ \_dav ->
retrieveHelper src d p
Left err -> giveup err
checkPresentExportDav :: DavHandleVar -> Remote -> Key -> ExportLocation -> Annex Bool
checkPresentExportDav hdl _ _k loc = case exportLocation loc of
Right p -> withDavHandle hdl $ \h -> liftIO $ do
v <- goDAV h $ existsDAV p
either giveup return v
Left err -> giveup err
removeExportDav :: DavHandleVar-> Key -> ExportLocation -> Annex ()
removeExportDav hdl _k loc = case exportLocation loc of
Right p -> withDavHandle hdl $ \h -> runExport h $ \_dav ->
removeHelper p
-- When the exportLocation is not legal for webdav,
-- the content is certianly not stored there, so it's ok for
-- removal to succeed. This allows recovery after failure to store
-- content there, as the user can rename the problem file and
-- this will be called to make sure it's gone.
Left _err -> return ()
removeExportDirectoryDav :: DavHandleVar -> ExportDirectory -> Annex ()
removeExportDirectoryDav hdl dir = withDavHandle hdl $ \h -> runExport h $ \_dav -> do
2019-12-02 16:26:33 +00:00
let d = fromRawFilePath $ fromExportDirectory dir
debugDav $ "delContent " ++ d
inLocation d delContentM
renameExportDav :: DavHandleVar -> Key -> ExportLocation -> ExportLocation -> Annex (Maybe ())
renameExportDav hdl _k src dest = case (exportLocation src, exportLocation dest) of
(Right srcl, Right destl) -> withDavHandle hdl $ \h ->
-- box.com's DAV endpoint has buggy handling of renames,
-- so avoid renaming when using it.
if boxComUrl `isPrefixOf` baseURL h
then return Nothing
else runExport h $ \dav -> do
maybe noop (void . mkColRecursive) (locationParent destl)
moveDAV (baseURL dav) srcl destl
return (Just ())
(Left err, _) -> giveup err
(_, Left err) -> giveup err
2020-05-15 16:17:15 +00:00
runExport :: DavHandle -> (DavHandle -> DAVT IO a) -> Annex a
runExport h a = liftIO (goDAV h (a h))
configUrl :: ParsedRemoteConfig -> Maybe URLString
configUrl c = fixup <$> getRemoteConfigValue urlField c
where
-- box.com DAV url changed
fixup = replace "https://www.box.com/dav/" boxComUrl
boxComUrl :: URLString
boxComUrl = "https://dav.box.com/dav/"
type DavUser = B8.ByteString
type DavPass = B8.ByteString
baseURL :: DavHandle -> URLString
baseURL (DavHandle _ _ _ u) = u
toDavUser :: String -> DavUser
toDavUser = B8.fromString
toDavPass :: String -> DavPass
toDavPass = B8.fromString
{- Test if a WebDAV store is usable, by writing to a test file, and then
- deleting the file.
-
- Also ensures that the path of the url exists, trying to create it if not.
-
- Throws an error if store is not usable.
-}
testDav :: URLString -> Maybe CredPair -> Annex ()
testDav url (Just (u, p)) = do
showAction "testing WebDAV server"
test $ liftIO $ evalDAVT url $ do
prepDAV user pass
makeParentDirs
inLocation (tmpLocation "test") $ do
putContentM (Nothing, L8.fromString "test")
delContentM
2012-11-17 19:30:11 +00:00
where
test a = liftIO $
either (\e -> throwIO $ "WebDAV test failed: " ++ show e)
2012-12-01 18:32:50 +00:00
(const noop)
=<< tryNonAsync a
2012-11-17 19:30:11 +00:00
user = toDavUser u
pass = toDavPass p
2012-11-16 17:32:18 +00:00
testDav _ Nothing = error "Need to configure webdav username and password."
{- Tries to make all the parent directories in the WebDAV urls's path,
- right down to the root.
-
- Ignores any failures, which can occur for reasons including the WebDAV
- server only serving up WebDAV in a subdirectory. -}
makeParentDirs :: DAVT IO ()
makeParentDirs = go
where
go = do
l <- getDAVLocation
case locationParent l of
Nothing -> noop
Just p -> void $ safely $ inDAVLocation (const p) go
void $ safely mkCol
{- Checks if the directory exists. If not, tries to create its
- parent directories, all the way down to the root, and finally creates
- it. -}
mkColRecursive :: DavLocation -> DAVT IO Bool
mkColRecursive d = go =<< existsDAV d
where
go (Right True) = return True
go _ = do
debugDav $ "mkCol " ++ d
ifM (inLocation d mkCol)
( return True
, do
case locationParent d of
Nothing -> makeParentDirs
Just parent -> void (mkColRecursive parent)
inLocation d mkCol
)
getCreds :: ParsedRemoteConfig -> RemoteGitConfig -> UUID -> Annex (Maybe CredPair)
getCreds c gc u = getRemoteCredPairFor "webdav" c gc (davCreds u)
davCreds :: UUID -> CredPairStorage
davCreds u = CredPairStorage
2012-12-13 04:45:27 +00:00
{ credPairFile = fromUUID u
, credPairEnvironment = ("WEBDAV_USERNAME", "WEBDAV_PASSWORD")
, credPairRemoteField = davcredsField
2012-12-13 04:45:27 +00:00
}
{- Content-Type to use for files uploaded to WebDAV. -}
contentType :: Maybe B8.ByteString
contentType = Just $ B8.fromString "application/octet-stream"
throwIO :: String -> IO a
throwIO msg = ioError $ mkIOError userErrorType msg Nothing Nothing
moveDAV :: URLString -> DavLocation -> DavLocation -> DAVT IO ()
moveDAV baseurl src dest = do
debugDav $ "moveContent " ++ src ++ " " ++ newurl
inLocation src $ moveContentM (B8.fromString newurl)
where
newurl = locationUrl baseurl dest
existsDAV :: DavLocation -> DAVT IO (Either String Bool)
existsDAV l = do
debugDav $ "getProps " ++ l
inLocation l check `catchNonAsync` (\e -> return (Left $ show e))
where
check = do
-- Some DAV services only support depth of 1, and
-- more depth is certainly not needed to check if a
-- location exists.
setDepth (Just Depth1)
catchJust missinghttpstatus
(getPropsM >> ispresent True)
(const $ ispresent False)
ispresent = return . Right
missinghttpstatus e =
matchStatusCodeException (== notFound404) e
<|> matchHttpExceptionContent toomanyredirects e
toomanyredirects (TooManyRedirects _) = True
toomanyredirects _ = False
safely :: DAVT IO a -> DAVT IO (Maybe a)
safely = eitherToMaybe <$$> tryNonAsync
choke :: IO (Either String a) -> IO a
choke f = do
x <- f
case x of
Left e -> error e
Right r -> return r
data DavHandle = DavHandle DAVContext DavUser DavPass URLString
type DavHandleVar = TVar (Either (Annex (Either String DavHandle)) (Either String DavHandle))
{- Prepares a DavHandle for later use. Does not connect to the server or do
- anything else expensive. -}
mkDavHandleVar :: ParsedRemoteConfig -> RemoteGitConfig -> UUID -> Annex DavHandleVar
mkDavHandleVar c gc u = liftIO $ newTVarIO $ Left $ do
mcreds <- getCreds c gc u
case (mcreds, configUrl c) of
(Just (user, pass), Just baseurl) -> do
ctx <- mkDAVContext baseurl
let h = DavHandle ctx (toDavUser user) (toDavPass pass) baseurl
return (Right h)
_ -> return $ Left "webdav credentials not available"
withDavHandle :: DavHandleVar -> (DavHandle -> Annex a) -> Annex a
withDavHandle hv a = liftIO (readTVarIO hv) >>= \case
Right hdl -> either giveup a hdl
Left mkhdl -> do
hdl <- mkhdl
liftIO $ atomically $ writeTVar hv (Right hdl)
either giveup a hdl
goDAV :: DavHandle -> DAVT IO a -> IO a
goDAV (DavHandle ctx user pass _) a = choke $ run $ prettifyExceptions $ do
prepDAV user pass
a
where
run = fst <$$> runDAVContext ctx
{- Catch StatusCodeException and trim it to only the statusMessage part,
- eliminating a lot of noise, which can include the whole request that
- failed. The rethrown exception is no longer a StatusCodeException. -}
prettifyExceptions :: DAVT IO a -> DAVT IO a
prettifyExceptions a = catchJust (matchStatusCodeException (const True)) a go
where
go (HttpExceptionRequest req (StatusCodeException response message)) = giveup $ unwords
[ "DAV failure:"
, show (responseStatus response)
, show (message)
, "HTTP request:"
, show (HTTP.method req)
, show (HTTP.path req)
]
go e = throwM e
prepDAV :: DavUser -> DavPass -> DAVT IO ()
prepDAV user pass = do
setResponseTimeout Nothing -- disable default (5 second!) timeout
setCreds user pass
2014-08-08 17:17:24 +00:00
--
-- Legacy chunking code, to be removed eventually.
--
storeLegacyChunked :: (Annex () -> IO ()) -> ChunkSize -> Key -> DavHandle -> L.ByteString -> IO ()
storeLegacyChunked annexrunner chunksize k dav b =
2014-08-08 17:17:24 +00:00
Legacy.storeChunks k tmp dest storer recorder finalizer
where
storehttp l b' = void $ goDAV dav $ do
maybe noop (void . mkColRecursive) (locationParent l)
debugDav $ "putContent " ++ l
2014-08-08 17:17:24 +00:00
inLocation l $ putContentM (contentType, b')
storer locs = Legacy.storeChunked annexrunner chunksize locs storehttp b
2014-08-08 17:17:24 +00:00
recorder l s = storehttp l (L8.fromString s)
finalizer tmp' dest' = goDAV dav $
finalizeStore dav tmp' (fromJust $ locationParent dest')
2014-08-08 17:17:24 +00:00
tmp = addTrailingPathSeparator $ keyTmpLocation k
dest = keyLocation k
2014-08-08 17:17:24 +00:00
retrieveLegacyChunked :: FilePath -> Key -> MeterUpdate -> DavHandle -> Annex ()
retrieveLegacyChunked d k p dav = liftIO $
2014-08-08 17:17:24 +00:00
withStoredFilesLegacyChunked k dav onerr $ \locs ->
Legacy.meteredWriteFileChunks p d locs $ \l ->
goDAV dav $ do
debugDav $ "getContent " ++ l
inLocation l $
snd <$> getContentM
2014-08-08 17:17:24 +00:00
where
onerr = error "download failed"
checkKeyLegacyChunked :: DavHandle -> CheckPresent
checkKeyLegacyChunked dav k = liftIO $
either error id <$> withStoredFilesLegacyChunked k dav onerr check
where
check [] = return $ Right True
check (l:ls) = do
v <- goDAV dav $ existsDAV l
if v == Right True
then check ls
else return v
{- Failed to read the chunkcount file; see if it's missing,
- or if there's a problem accessing it,
- or perhaps this was an intermittent error. -}
onerr f = do
v <- goDAV dav $ existsDAV f
return $ if v == Right True
then Left $ "failed to read " ++ f
else v
withStoredFilesLegacyChunked
:: Key
-> DavHandle
-> (DavLocation -> IO a)
-> ([DavLocation] -> IO a)
-> IO a
withStoredFilesLegacyChunked k dav onerr a = do
let chunkcount = keyloc ++ Legacy.chunkCount
v <- goDAV dav $ safely $ do
debugDav $ "getContent " ++ chunkcount
2014-08-08 17:17:24 +00:00
inLocation chunkcount $
snd <$> getContentM
case v of
Just s -> a $ Legacy.listChunks keyloc $ L8.toString s
Nothing -> do
chunks <- Legacy.probeChunks keyloc $ \f ->
(== Right True) <$> goDAV dav (existsDAV f)
if null chunks
then onerr chunkcount
else a chunks
where
keyloc = keyLocation k
debugDav :: MonadIO m => String -> DAVT m ()
debugDav msg = liftIO $ debugM "WebDAV" msg