git-annex/Remote/Web.hs
Joey Hess d154e7022e
incremental verification for web special remote
Except when configuration makes curl be used. It did not seem worth
trying to tail the file when curl is downloading.

But when an interrupted download is resumed, it does not read the whole
existing file to hash it. Same reason discussed in
commit 7eb3742e4b76d1d7a487c2c53bf25cda4ee5df43; that could take a long
time with no progress being displayed. And also there's an open http
request, which needs to be consumed; taking a long time to hash the file
might cause it to time out.

Also in passing implemented it for git and external special remotes when
downloading from the web. Several others like S3 are within striking
distance now as well.

Sponsored-by: Dartmouth College's DANDI project
2021-08-18 15:02:22 -04:00

141 lines
3.9 KiB
Haskell

{- Web remote.
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Remote.Web (remote, getWebUrls) where
import Annex.Common
import Types.Remote
import Remote.Helper.ExportImport
import qualified Git
import qualified Git.Construct
import Annex.Content
import Annex.Verify
import Config.Cost
import Config
import Logs.Web
import Annex.UUID
import Utility.Metered
import qualified Annex.Url as Url
import Annex.YoutubeDl
import Annex.SpecialRemote.Config
remote :: RemoteType
remote = RemoteType
{ typename = "web"
, enumerate = list
, generate = gen
, configParser = mkRemoteConfigParser []
, setup = error "not supported"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
-- There is only one web remote, and it always exists.
-- (If the web should cease to exist, remove this module and redistribute
-- a new release to the survivors by carrier pigeon.)
list :: Bool -> Annex [Git.Repo]
list _autoinit = do
r <- liftIO $ Git.Construct.remoteNamed "web" (pure Git.Construct.fromUnknown)
return [r]
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r _ rc gc rs = do
c <- parsedRemoteConfig remote rc
cst <- remoteCost gc expensiveRemoteCost
return $ Just Remote
{ uuid = webUUID
, cost = cst
, name = Git.repoDescribe r
, storeKey = uploadKey
, retrieveKeyFile = downloadKey
, retrieveKeyFileCheap = Nothing
-- HttpManagerRestricted is used here, so this is
-- secure.
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = dropKey
, lockContent = Nothing
, checkPresent = checkKey
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
, config = c
, gitconfig = gc
, localpath = Nothing
, getRepo = return r
, readonly = True
, appendonly = False
, untrustworthy = False
, availability = GloballyAvailable
, remotetype = remote
, mkUnavailable = return Nothing
, getInfo = return []
, claimUrl = Nothing -- implicitly claims all urls
, checkUrl = Nothing
, remoteStateHandle = rs
}
downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
downloadKey key _af dest p vc = go =<< getWebUrls key
where
go [] = giveup "no known url"
go urls = getM dl urls >>= \case
Just v -> return v
Nothing -> giveup "download failed"
dl u = do
let (u', downloader) = getDownloader u
case downloader of
YoutubeDownloader ->
ifM (youtubeDlTo key u' dest p)
( return (Just UnVerified)
, return Nothing
)
_ -> do
iv <- startVerifyKeyContentIncrementally vc key
ifM (Url.withUrlOptions $ downloadUrl key p iv [u'] dest)
( finishVerifyKeyContentIncrementally iv >>= \case
(True, v) -> return (Just v)
(False, _) -> return Nothing
, return Nothing
)
uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex ()
uploadKey _ _ _ = giveup "upload to web not supported"
dropKey :: Key -> Annex ()
dropKey k = mapM_ (setUrlMissing k) =<< getWebUrls k
checkKey :: Key -> Annex Bool
checkKey key = do
us <- getWebUrls key
if null us
then return False
else either giveup return =<< checkKey' key us
checkKey' :: Key -> [URLString] -> Annex (Either String Bool)
checkKey' key us = firsthit us (Right False) $ \u -> do
let (u', downloader) = getDownloader u
case downloader of
YoutubeDownloader -> youtubeDlCheck u'
_ -> catchMsgIO $
Url.withUrlOptions $ Url.checkBoth u' (fromKey keySize key)
where
firsthit [] miss _ = return miss
firsthit (u:rest) _ a = do
r <- a u
case r of
Right True -> return r
_ -> firsthit rest r a
getWebUrls :: Key -> Annex [URLString]
getWebUrls key = filter supported <$> getUrls key
where
supported u = snd (getDownloader u)
`elem` [WebDownloader, YoutubeDownloader]