git-annex/Remote/Web.hs

146 lines
4.1 KiB
Haskell
Raw Normal View History

{- Web remote.
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Remote.Web (remote, getWebUrls) where
import Annex.Common
import Types.Remote
2019-02-20 19:55:01 +00:00
import Remote.Helper.ExportImport
import qualified Git
import qualified Git.Construct
import Annex.Content
import Annex.Verify
import Config.Cost
import Config
import Logs.Web
2014-12-17 17:57:52 +00:00
import Annex.UUID
import Utility.Metered
import qualified Annex.Url as Url
import Annex.YoutubeDl
import Annex.SpecialRemote.Config
2011-12-31 08:11:39 +00:00
remote :: RemoteType
remote = RemoteType
{ typename = "web"
, enumerate = list
, generate = gen
, configParser = mkRemoteConfigParser []
, setup = error "not supported"
, exportSupported = exportUnsupported
2019-02-20 19:55:01 +00:00
, importSupported = importUnsupported
add thirdPartyPopulated interface This is to support, eg a borg repo as a special remote, which is populated not by running git-annex commands, but by using borg. Then git-annex sync lists the content of the remote, learns which files are annex objects, and treats those as present in the remote. So, most of the import machinery is reused, to a new purpose. While normally importtree maintains a remote tracking branch, this does not, because the files stored in the remote are annex object files, not user-visible filenames. But, internally, a git tree is still generated, of the files on the remote that are annex objects. This tree is used by retrieveExportWithContentIdentifier, etc. As with other import/export remotes, that the tree is recorded in the export log, and gets grafted into the git-annex branch. importKey changed to be able to return Nothing, to indicate when an ImportLocation is not an annex object and so should be skipped from being included in the tree. It did not seem to make sense to have git-annex import do this, since from the user's perspective, it's not like other imports. So only git-annex sync does it. Note that, git-annex sync does not yet download objects from such remotes that are preferred content. importKeys is run with content downloading disabled, to avoid getting the content of all objects. Perhaps what's needed is for seekSyncContent to be run with these remotes, but I don't know if it will just work (in particular, it needs to avoid trying to transfer objects to them), so I skipped that for now. (Untested and unused as of yet.) This commit was sponsored by Jochen Bartl on Patreon.
2020-12-18 18:52:57 +00:00
, thirdPartyPopulated = False
}
-- There is only one web remote, and it always exists.
-- (If the web should cease to exist, remove this module and redistribute
-- a new release to the survivors by carrier pigeon.)
list :: Bool -> Annex [Git.Repo]
list _autoinit = do
2015-02-12 19:33:05 +00:00
r <- liftIO $ Git.Construct.remoteNamed "web" (pure Git.Construct.fromUnknown)
2011-12-14 19:30:14 +00:00
return [r]
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r _ rc gc rs = do
c <- parsedRemoteConfig remote rc
cst <- remoteCost gc expensiveRemoteCost
2014-12-16 19:26:13 +00:00
return $ Just Remote
{ uuid = webUUID
, cost = cst
2014-12-16 19:26:13 +00:00
, name = Git.repoDescribe r
, storeKey = uploadKey
, retrieveKeyFile = downloadKey
, retrieveKeyFileCheap = Nothing
-- HttpManagerRestricted is used here, so this is
-- secure.
, retrievalSecurityPolicy = RetrievalAllKeysSecure
2014-12-16 19:26:13 +00:00
, removeKey = dropKey
, lockContent = Nothing
2014-12-16 19:26:13 +00:00
, checkPresent = checkKey
, checkPresentCheap = False
, exportActions = exportUnsupported
2019-02-20 19:55:01 +00:00
, importActions = importUnsupported
, whereisKey = Nothing
2014-12-16 19:26:13 +00:00
, remoteFsck = Nothing
, repairRepo = Nothing
, config = c
, gitconfig = gc
, localpath = Nothing
, getRepo = return r
2014-12-16 19:26:13 +00:00
, readonly = True
, appendonly = False
, untrustworthy = False
2014-12-16 19:26:13 +00:00
, availability = GloballyAvailable
, remotetype = remote
, mkUnavailable = return Nothing
, getInfo = return []
, claimUrl = Nothing -- implicitly claims all urls
, checkUrl = Nothing
, remoteStateHandle = rs
2014-12-16 19:26:13 +00:00
}
downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
downloadKey key _af dest p vc = go =<< getWebUrls key
2012-11-11 04:51:07 +00:00
where
go [] = giveup "no known url"
go urls = dl (partition (not . isyoutube) (map getDownloader urls)) >>= \case
Just v -> return v
Nothing -> giveup $ unwords
[ "downloading from all"
, show (length urls)
, "known url(s) failed"
]
dl ([], ytus) = flip getM (map fst ytus) $ \u ->
ifM (youtubeDlTo key u dest p)
( return (Just UnVerified)
, return Nothing
)
dl (us, ytus) = do
iv <- startVerifyKeyContentIncrementally vc key
ifM (Url.withUrlOptions $ downloadUrl True key p iv (map fst us) dest)
( finishVerifyKeyContentIncrementally iv >>= \case
(True, v) -> return (Just v)
(False, _) -> dl ([], ytus)
, dl ([], ytus)
)
isyoutube (_, YoutubeDownloader) = True
isyoutube _ = False
uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex ()
uploadKey _ _ _ = giveup "upload to web not supported"
2020-05-14 18:08:09 +00:00
dropKey :: Key -> Annex ()
dropKey k = mapM_ (setUrlMissing k) =<< getWebUrls k
checkKey :: Key -> Annex Bool
2011-07-01 21:15:46 +00:00
checkKey key = do
us <- getWebUrls key
if null us
then return False
else either giveup return =<< checkKey' key us
2013-09-09 06:16:22 +00:00
checkKey' :: Key -> [URLString] -> Annex (Either String Bool)
checkKey' key us = firsthit us (Right False) $ \u -> do
let (u', downloader) = getDownloader u
case downloader of
YoutubeDownloader -> youtubeDlCheck u'
_ -> catchMsgIO $
Url.withUrlOptions $ Url.checkBoth u' (fromKey keySize key)
2013-09-09 06:16:22 +00:00
where
firsthit [] miss _ = return miss
2013-09-09 06:16:22 +00:00
firsthit (u:rest) _ a = do
r <- a u
case r of
Right True -> return r
_ -> firsthit rest r a
getWebUrls :: Key -> Annex [URLString]
getWebUrls key = filter supported <$> getUrls key
where
supported u = snd (getDownloader u)
`elem` [WebDownloader, YoutubeDownloader]