From 30bf112185955ae703deecfd85a94c2c6a5a9769 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 8 Dec 2014 19:14:24 -0400 Subject: [PATCH] Urls can now be claimed by remotes. This will allow creating, for example, a external special remote that handles magnet: and *.torrent urls. --- Annex.hs | 3 + Assistant/Upgrade.hs | 4 +- Command/AddUrl.hs | 181 +++++++++++++----- Command/ReKey.hs | 6 +- Command/RmUrl.hs | 7 +- Command/Whereis.hs | 21 +- Logs/Web.hs | 55 ++++-- Remote.hs | 13 +- Remote/Bup.hs | 1 + Remote/Ddar.hs | 1 + Remote/Directory.hs | 3 +- Remote/External.hs | 17 +- Remote/External/Types.hs | 13 ++ Remote/GCrypt.hs | 1 + Remote/Git.hs | 1 + Remote/Glacier.hs | 3 +- Remote/Hook.hs | 3 +- Remote/Rsync.hs | 1 + Remote/S3.hs | 5 +- Remote/Tahoe.hs | 3 +- Remote/Web.hs | 23 ++- Remote/WebDAV.hs | 3 +- Types/Remote.hs | 7 +- debian/changelog | 3 + .../external_special_remote_protocol.mdwn | 24 ++- doc/devblog/day_237__extending_addurl.mdwn | 14 ++ doc/internals.mdwn | 5 +- doc/todo/extensible_addurl.mdwn | 39 +++- 28 files changed, 346 insertions(+), 114 deletions(-) create mode 100644 doc/devblog/day_237__extending_addurl.mdwn diff --git a/Annex.hs b/Annex.hs index a04bfd1bb1..82a378f791 100644 --- a/Annex.hs +++ b/Annex.hs @@ -63,6 +63,7 @@ import Types.CleanupActions import Utility.Quvi (QuviVersion) #endif import Utility.InodeCache +import Utility.Url import "mtl" Control.Monad.Reader import Control.Concurrent @@ -128,6 +129,7 @@ data AnnexState = AnnexState , useragent :: Maybe String , errcounter :: Integer , unusedkeys :: Maybe (S.Set Key) + , tempurls :: M.Map Key URLString #ifdef WITH_QUVI , quviversion :: Maybe QuviVersion #endif @@ -173,6 +175,7 @@ newState c r = AnnexState , useragent = Nothing , errcounter = 0 , unusedkeys = Nothing + , tempurls = M.empty #ifdef WITH_QUVI , quviversion = Nothing #endif diff --git a/Assistant/Upgrade.hs b/Assistant/Upgrade.hs index a05d5e316a..c92cc30f64 100644 --- a/Assistant/Upgrade.hs +++ b/Assistant/Upgrade.hs @@ -80,7 +80,7 @@ startDistributionDownload d = go =<< liftIO . newVersionLocation d =<< liftIO ol where go Nothing = debug ["Skipping redundant upgrade"] go (Just dest) = do - liftAnnex $ setUrlPresent k u + liftAnnex $ setUrlPresent webUUID k u hook <- asIO1 $ distributionDownloadComplete d dest cleanup modifyDaemonStatus_ $ \s -> s { transferHook = M.insert k hook (transferHook s) } @@ -97,7 +97,7 @@ startDistributionDownload d = go =<< liftIO . newVersionLocation d =<< liftIO ol } cleanup = liftAnnex $ do lockContent k removeAnnex - setUrlMissing k u + setUrlMissing webUUID k u logStatus k InfoMissing {- Called once the download is done. diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 81da67639e..76095d6e4a 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2011-2013 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -19,6 +19,8 @@ import qualified Annex import qualified Annex.Queue import qualified Annex.Url as Url import qualified Backend.URL +import qualified Remote +import qualified Types.Remote as Remote import Annex.Content import Logs.Web import Types.Key @@ -26,6 +28,7 @@ import Types.KeySource import Config import Annex.Content.Direct import Logs.Location +import Utility.Metered import qualified Annex.Transfer as Transfer #ifdef WITH_QUVI import Annex.Quvi @@ -54,7 +57,71 @@ seek ps = do withStrings (start relaxed f d) ps start :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart -start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s +start relaxed optfile pathdepth s = do + r <- Remote.claimingUrl s + if Remote.uuid r == webUUID + then startWeb relaxed optfile pathdepth s + else startRemote r relaxed optfile pathdepth s + +startRemote :: Remote -> Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart +startRemote r relaxed optfile pathdepth s = do + url <- case Url.parseURIRelaxed s of + Nothing -> error $ "bad uri " ++ s + Just u -> pure u + pathmax <- liftIO $ fileNameLengthLimit "." + let file = choosefile $ url2file url pathdepth pathmax + showStart "addurl" file + showNote $ "using " ++ Remote.name r + next $ performRemote r relaxed s file + where + choosefile = flip fromMaybe optfile + +performRemote :: Remote -> Bool -> URLString -> FilePath -> CommandPerform +performRemote r relaxed uri file = ifAnnexed file adduri geturi + where + loguri = setDownloader uri OtherDownloader + adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize + checkexistssize key = do + res <- tryNonAsync $ Remote.checkUrl r uri + case res of + Left e -> do + warning (show e) + return (False, False) + Right Nothing -> + return (True, True) + Right (Just sz) -> + return (True, sz == fromMaybe sz (keySize key)) + geturi = do + dummykey <- Backend.URL.fromUrl uri =<< + if relaxed + then return Nothing + else Remote.checkUrl r uri + liftIO $ createDirectoryIfMissing True (parentDir file) + next $ ifM (Annex.getState Annex.fast <||> pure relaxed) + ( do + res <- tryNonAsync $ Remote.checkUrl r uri + case res of + Left e -> do + warning (show e) + return False + Right size -> do + key <- Backend.URL.fromUrl uri size + cleanup (Remote.uuid r) loguri file key Nothing + return True + , do + -- Set temporary url for the dummy key + -- so that the remote knows what url it + -- should use to download it. + setTempUrl dummykey uri + let downloader = Remote.retrieveKeyFile r dummykey (Just file) + ok <- isJust <$> + downloadWith downloader dummykey (Remote.uuid r) loguri file + removeTempUrl dummykey + return ok + ) + +startWeb :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart +startWeb relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s where (s', downloader) = getDownloader s bad = fromMaybe (error $ "bad url " ++ s') $ @@ -62,7 +129,7 @@ start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s choosefile = flip fromMaybe optfile go url = case downloader of QuviDownloader -> usequvi - DefaultDownloader -> + _ -> #ifdef WITH_QUVI ifM (quviSupported s') ( usequvi @@ -75,7 +142,7 @@ start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s pathmax <- liftIO $ fileNameLengthLimit "." let file = choosefile $ url2file url pathdepth pathmax showStart "addurl" file - next $ perform relaxed s' file + next $ performWeb relaxed s' file #ifdef WITH_QUVI badquvi = error $ "quvi does not know how to download url " ++ s' usequvi = do @@ -96,7 +163,9 @@ performQuvi :: Bool -> URLString -> URLString -> FilePath -> CommandPerform performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl where quviurl = setDownloader pageurl QuviDownloader - addurl key = next $ cleanup quviurl file key Nothing + addurl key = next $ do + cleanup webUUID quviurl file key Nothing + return True geturl = next $ isJust <$> addUrlFileQuvi relaxed quviurl videourl file #endif @@ -106,7 +175,7 @@ addUrlFileQuvi relaxed quviurl videourl file = do key <- Backend.URL.fromUrl quviurl Nothing ifM (pure relaxed <||> Annex.getState Annex.fast) ( do - cleanup' quviurl file key Nothing + cleanup webUUID quviurl file key Nothing return (Just key) , do {- Get the size, and use that to check @@ -124,55 +193,65 @@ addUrlFileQuvi relaxed quviurl videourl file = do downloadUrl [videourl] tmp if ok then do - cleanup' quviurl file key (Just tmp) + cleanup webUUID quviurl file key (Just tmp) return (Just key) else return Nothing ) #endif -perform :: Bool -> URLString -> FilePath -> CommandPerform -perform relaxed url file = ifAnnexed file addurl geturl +performWeb :: Bool -> URLString -> FilePath -> CommandPerform +performWeb relaxed url file = ifAnnexed file addurl geturl where geturl = next $ isJust <$> addUrlFile relaxed url file - addurl key - | relaxed = do - setUrlPresent key url - next $ return True - | otherwise = ifM (elem url <$> getUrls key) - ( stop - , do - (exists, samesize) <- Url.withUrlOptions $ Url.check url (keySize key) - if exists && samesize - then do - setUrlPresent key url - next $ return True - else do - warning $ "while adding a new url to an already annexed file, " ++ if exists - then "url does not have expected file size (use --relaxed to bypass this check) " ++ url - else "failed to verify url exists: " ++ url - stop - ) + addurl = addUrlChecked relaxed url webUUID checkexistssize + checkexistssize = Url.withUrlOptions . Url.check url . keySize + +addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform +addUrlChecked relaxed url u checkexistssize key + | relaxed = do + setUrlPresent u key url + next $ return True + | otherwise = ifM (elem url <$> getUrls key) + ( stop + , do + (exists, samesize) <- checkexistssize key + if exists && samesize + then do + setUrlPresent u key url + next $ return True + else do + warning $ "while adding a new url to an already annexed file, " ++ if exists + then "url does not have expected file size (use --relaxed to bypass this check) " ++ url + else "failed to verify url exists: " ++ url + stop + ) addUrlFile :: Bool -> URLString -> FilePath -> Annex (Maybe Key) addUrlFile relaxed url file = do liftIO $ createDirectoryIfMissing True (parentDir file) ifM (Annex.getState Annex.fast <||> pure relaxed) ( nodownload relaxed url file - , do - showAction $ "downloading " ++ url ++ " " - download url file + , downloadWeb url file ) -download :: URLString -> FilePath -> Annex (Maybe Key) -download url file = do - {- Generate a dummy key to use for this download, before we can - - examine the file and find its real key. This allows resuming - - downloads, as the dummy key for a given url is stable. -} +downloadWeb :: URLString -> FilePath -> Annex (Maybe Key) +downloadWeb url file = do dummykey <- addSizeUrlKey url =<< Backend.URL.fromUrl url Nothing + let downloader f _ = do + showOutput + downloadUrl [url] f + showAction $ "downloading " ++ url ++ " " + downloadWith downloader dummykey webUUID url file + +{- The Key should be a dummy key, based on the URL, which is used + - for this download, before we can examine the file and find its real key. + - For resuming downloads to work, the dummy key for a given url should be + - stable. -} +downloadWith :: (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> FilePath -> Annex (Maybe Key) +downloadWith downloader dummykey u url file = prepGetViaTmpChecked dummykey Nothing $ do tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey - showOutput - ifM (runtransfer dummykey tmp) + ifM (runtransfer tmp) ( do backend <- chooseBackend file let source = KeySource @@ -184,15 +263,15 @@ download url file = do case k of Nothing -> return Nothing Just (key, _) -> do - cleanup' url file key (Just tmp) + cleanup u url file key (Just tmp) return (Just key) , return Nothing ) where - runtransfer dummykey tmp = Transfer.notifyTransfer Transfer.Download (Just file) $ - Transfer.download webUUID dummykey (Just file) Transfer.forwardRetry $ const $ do + runtransfer tmp = Transfer.notifyTransfer Transfer.Download (Just file) $ + Transfer.download webUUID dummykey (Just file) Transfer.forwardRetry $ \p -> do liftIO $ createDirectoryIfMissing True (parentDir tmp) - downloadUrl [url] tmp + downloader tmp p {- Hits the url to get the size, if available. - @@ -204,16 +283,11 @@ addSizeUrlKey url key = do size <- snd <$> Url.withUrlOptions (Url.exists url) return $ key { keySize = size } -cleanup :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex Bool -cleanup url file key mtmp = do - cleanup' url file key mtmp - return True - -cleanup' :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex () -cleanup' url file key mtmp = do +cleanup :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () +cleanup u url file key mtmp = do when (isJust mtmp) $ logStatus key InfoPresent - setUrlPresent key url + setUrlPresent u key url Command.Add.addLink file key Nothing whenM isDirect $ do void $ addAssociatedFile key file @@ -230,7 +304,7 @@ nodownload relaxed url file = do if exists then do key <- Backend.URL.fromUrl url size - cleanup' url file key Nothing + cleanup webUUID url file key Nothing return (Just key) else do warning $ "unable to access url: " ++ url @@ -245,8 +319,11 @@ url2file url pathdepth pathmax = case pathdepth of | depth < 0 -> frombits $ reverse . take (negate depth) . reverse | otherwise -> error "bad --pathdepth" where - fullurl = uriRegName auth ++ uriPath url ++ uriQuery url + fullurl = concat + [ maybe "" uriRegName (uriAuthority url) + , uriPath url + , uriQuery url + ] frombits a = intercalate "/" $ a urlbits urlbits = map (truncateFilePath pathmax . sanitizeFilePath) $ filter (not . null) $ split "/" fullurl - auth = fromMaybe (error $ "bad url " ++ show url) $ uriAuthority url diff --git a/Command/ReKey.hs b/Command/ReKey.hs index a203ab8d5b..5dadf4e60c 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -16,6 +16,7 @@ import qualified Command.Add import Logs.Web import Logs.Location import Utility.CopyFile +import qualified Remote cmd :: [Command] cmd = [notDirect $ command "rekey" @@ -61,8 +62,9 @@ cleanup file oldkey newkey = do -- If the old key had some associated urls, record them for -- the new key as well. urls <- getUrls oldkey - unless (null urls) $ - mapM_ (setUrlPresent newkey) urls + forM_ urls $ \url -> do + r <- Remote.claimingUrl url + setUrlPresent (Remote.uuid r) newkey url -- Update symlink to use the new key. liftIO $ removeFile file diff --git a/Command/RmUrl.hs b/Command/RmUrl.hs index 1582d0f3fd..5700042665 100644 --- a/Command/RmUrl.hs +++ b/Command/RmUrl.hs @@ -10,6 +10,7 @@ module Command.RmUrl where import Common.Annex import Command import Logs.Web +import qualified Remote cmd :: [Command] cmd = [notBareRepo $ @@ -26,5 +27,9 @@ start (file, url) = flip whenAnnexed file $ \_ key -> do cleanup :: String -> Key -> CommandCleanup cleanup url key = do - setUrlMissing key url + r <- Remote.claimingUrl url + let url' = if Remote.uuid r == webUUID + then url + else setDownloader url OtherDownloader + setUrlMissing (Remote.uuid r) key url' return True diff --git a/Command/Whereis.hs b/Command/Whereis.hs index 582aaffc2d..314c204beb 100644 --- a/Command/Whereis.hs +++ b/Command/Whereis.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess + - Copyright 2010-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -13,6 +13,7 @@ import Common.Annex import Command import Remote import Logs.Trust +import Logs.Web cmd :: [Command] cmd = [noCommit $ withOptions (jsonOption : keyOptions) $ @@ -57,9 +58,17 @@ perform remotemap key = do untrustedheader = "The following untrusted locations may also have copies:\n" performRemote :: Key -> Remote -> Annex () -performRemote key remote = maybe noop go $ whereisKey remote +performRemote key remote = do + ls <- (++) + <$> askremote + <*> claimedurls + unless (null ls) $ showLongNote $ unlines $ + map (\l -> name remote ++ ": " ++ l) ls where - go a = do - ls <- a key - unless (null ls) $ showLongNote $ unlines $ - map (\l -> name remote ++ ": " ++ l) ls + askremote = maybe (pure []) (flip id key) (whereisKey remote) + claimedurls = do + us <- map fst + . filter (\(_, d) -> d == OtherDownloader) + . map getDownloader + <$> getUrls key + filterM (\u -> (==) <$> pure remote <*> claimingUrl u) us diff --git a/Logs/Web.hs b/Logs/Web.hs index 19a3084efe..c3e5c34329 100644 --- a/Logs/Web.hs +++ b/Logs/Web.hs @@ -1,6 +1,6 @@ {- Web url logs. - - - Copyright 2011, 2013 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -16,12 +16,16 @@ module Logs.Web ( Downloader(..), getDownloader, setDownloader, + setTempUrl, + removeTempUrl, ) where import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Map as M import Data.Tuple.Utils import Common.Annex +import qualified Annex import Logs import Logs.Presence import Logs.Location @@ -37,7 +41,10 @@ webUUID = UUID "00000000-0000-0000-0000-000000000001" {- Gets all urls that a key might be available from. -} getUrls :: Key -> Annex [URLString] -getUrls key = go $ urlLogFile key : oldurlLogs key +getUrls key = do + l <- go $ urlLogFile key : oldurlLogs key + tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls) + return (tmpl ++ l) where go [] = return [] go (l:ls) = do @@ -49,19 +56,18 @@ getUrls key = go $ urlLogFile key : oldurlLogs key getUrlsWithPrefix :: Key -> String -> Annex [URLString] getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`) <$> getUrls key -setUrlPresent :: Key -> URLString -> Annex () -setUrlPresent key url = do +setUrlPresent :: UUID -> Key -> URLString -> Annex () +setUrlPresent uuid key url = do us <- getUrls key unless (url `elem` us) $ do addLog (urlLogFile key) =<< logNow InfoPresent url - -- update location log to indicate that the web has the key - logChange key webUUID InfoPresent + logChange key uuid InfoPresent -setUrlMissing :: Key -> URLString -> Annex () -setUrlMissing key url = do +setUrlMissing :: UUID -> Key -> URLString -> Annex () +setUrlMissing uuid key url = do addLog (urlLogFile key) =<< logNow InfoMissing url whenM (null <$> getUrls key) $ - logChange key webUUID InfoMissing + logChange key uuid InfoMissing {- Finds all known urls. -} knownUrls :: Annex [URLString] @@ -81,18 +87,27 @@ knownUrls = do geturls Nothing = return [] geturls (Just logsha) = getLog . L.unpack <$> catObject logsha -data Downloader = DefaultDownloader | QuviDownloader +setTempUrl :: Key -> URLString -> Annex () +setTempUrl key url = Annex.changeState $ \s -> + s { Annex.tempurls = M.insert key url (Annex.tempurls s) } + +removeTempUrl :: Key -> Annex () +removeTempUrl key = Annex.changeState $ \s -> + s { Annex.tempurls = M.delete key (Annex.tempurls s) } + +data Downloader = WebDownloader | QuviDownloader | OtherDownloader + deriving (Eq) + +{- To keep track of how an url is downloaded, it's mangled slightly in + - the log. For quvi, "quvi:" is prefixed. For urls that are handled by + - some other remote, ":" is prefixed. -} +setDownloader :: URLString -> Downloader -> String +setDownloader u WebDownloader = u +setDownloader u QuviDownloader = "quvi:" ++ u +setDownloader u OtherDownloader = ":" ++ u -{- Determines the downloader for an URL. - - - - Some URLs are not downloaded by normal means, and this is indicated - - by prefixing them with downloader: when they are recorded in the url - - logs. -} getDownloader :: URLString -> (URLString, Downloader) getDownloader u = case separate (== ':') u of ("quvi", u') -> (u', QuviDownloader) - _ -> (u, DefaultDownloader) - -setDownloader :: URLString -> Downloader -> URLString -setDownloader u DefaultDownloader = u -setDownloader u QuviDownloader = "quvi:" ++ u + ("", u') -> (u', OtherDownloader) + _ -> (u, WebDownloader) diff --git a/Remote.hs b/Remote.hs index 37dfafa1fc..65e725338f 100644 --- a/Remote.hs +++ b/Remote.hs @@ -45,7 +45,8 @@ module Remote ( forceTrust, logStatus, checkAvailable, - isXMPPRemote + isXMPPRemote, + claimingUrl, ) where import qualified Data.Map as M @@ -60,6 +61,7 @@ import Annex.UUID import Logs.UUID import Logs.Trust import Logs.Location hiding (logStatus) +import Logs.Web import Remote.List import Config import Git.Types (RemoteName) @@ -318,3 +320,12 @@ hasKey r k = either (Left . show) Right <$> tryNonAsync (checkPresent r k) hasKeyCheap :: Remote -> Bool hasKeyCheap = checkPresentCheap + +{- The web special remote claims urls by default. -} +claimingUrl :: URLString -> Annex Remote +claimingUrl url = do + rs <- remoteList + let web = Prelude.head $ filter (\r -> uuid r == webUUID) rs + fromMaybe web <$> firstM checkclaim rs + where + checkclaim = maybe (pure False) (flip id url) . claimUrl diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 8744aa3573..405ce3056c 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -75,6 +75,7 @@ gen r u c gc = do , mkUnavailable = return Nothing , getInfo = return [("repo", buprepo)] , claimUrl = Nothing + , checkUrl = const $ return Nothing } return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this buprepo) diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index a57f5f6c32..1b8003dd8a 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -72,6 +72,7 @@ gen r u c gc = do , mkUnavailable = return Nothing , getInfo = return [("repo", ddarrepo)] , claimUrl = Nothing + , checkUrl = const $ return Nothing } ddarrepo = fromMaybe (error "missing ddarrepo") $ remoteAnnexDdarRepo gc specialcfg = (specialRemoteCfg c) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index d83ab2dae1..fec40baa81 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -69,7 +69,8 @@ gen r u c gc = do mkUnavailable = gen r u c $ gc { remoteAnnexDirectory = Just "/dev/null" }, getInfo = return [("directory", dir)], - claimUrl = Nothing + claimUrl = Nothing, + checkUrl = const $ return Nothing } where dir = fromMaybe (error "missing directory") $ remoteAnnexDirectory gc diff --git a/Remote/External.hs b/Remote/External.hs index 97aa247ba6..b6928a8274 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -70,7 +70,8 @@ gen r u c gc = do mkUnavailable = gen r u c $ gc { remoteAnnexExternalType = Just "!dne!" }, getInfo = return [("externaltype", externaltype)], - claimUrl = Just (claimurl external) + claimUrl = Just (claimurl external), + checkUrl = checkurl external } where externaltype = fromMaybe (error "missing externaltype") (remoteAnnexExternalType gc) @@ -217,8 +218,10 @@ handleRequest' lck external req mp responsehandler state <- fromMaybe "" <$> getRemoteState (externalUUID external) key send $ VALUE state - handleRemoteRequest (SETURLPRESENT key url) = setUrlPresent key url - handleRemoteRequest (SETURLMISSING key url) = setUrlMissing key url + handleRemoteRequest (SETURLPRESENT key url) = + setUrlPresent (externalUUID external) key url + handleRemoteRequest (SETURLMISSING key url) = + setUrlMissing (externalUUID external) key url handleRemoteRequest (GETURLS key prefix) = do mapM_ (send . VALUE) =<< getUrlsWithPrefix key prefix send (VALUE "") -- end of list @@ -425,3 +428,11 @@ claimurl external url = UNSUPPORTED_REQUEST -> Just $ return False _ -> Nothing +checkurl :: External -> URLString -> Annex (Maybe Integer) +checkurl external url = + handleRequest external (CHECKURL url) Nothing $ \req -> case req of + CHECKURL_SIZE sz -> Just $ return $ Just sz + CHECKURL_SIZEUNKNOWN -> Just $ return Nothing + CHECKURL_FAILURE errmsg -> Just $ error errmsg + UNSUPPORTED_REQUEST -> error "CHECKURL not implemented by external special remote" + _ -> Nothing diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index 2fc29e5b46..b00352702b 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -92,6 +92,7 @@ data Request | GETCOST | GETAVAILABILITY | CLAIMURL URLString + | CHECKURL URLString | TRANSFER Direction Key FilePath | CHECKPRESENT Key | REMOVE Key @@ -109,6 +110,7 @@ instance Proto.Sendable Request where formatMessage GETCOST = ["GETCOST"] formatMessage GETAVAILABILITY = ["GETAVAILABILITY"] formatMessage (CLAIMURL url) = [ "CLAIMURL", Proto.serialize url ] + formatMessage (CHECKURL url) = [ "CHECKURL", Proto.serialize url ] formatMessage (TRANSFER direction key file) = [ "TRANSFER" , Proto.serialize direction @@ -135,6 +137,9 @@ data Response | INITREMOTE_FAILURE ErrorMsg | CLAIMURL_SUCCESS | CLAIMURL_FAILURE + | CHECKURL_SIZE Size + | CHECKURL_SIZEUNKNOWN + | CHECKURL_FAILURE ErrorMsg | UNSUPPORTED_REQUEST deriving (Show) @@ -154,6 +159,9 @@ instance Proto.Receivable Response where parseCommand "INITREMOTE-FAILURE" = Proto.parse1 INITREMOTE_FAILURE parseCommand "CLAIMURL-SUCCESS" = Proto.parse0 CLAIMURL_SUCCESS parseCommand "CLAIMURL-FAILURE" = Proto.parse0 CLAIMURL_FAILURE + parseCommand "CHECKURL-SIZE" = Proto.parse1 CHECKURL_SIZE + parseCommand "CHECKURL-SIZEUNKNOWN" = Proto.parse0 CHECKURL_SIZEUNKNOWN + parseCommand "CHECKURL-FAILURE" = Proto.parse1 CHECKURL_FAILURE parseCommand "UNSUPPORTED-REQUEST" = Proto.parse0 UNSUPPORTED_REQUEST parseCommand _ = Proto.parseFail @@ -225,6 +233,7 @@ instance Proto.Receivable AsyncMessage where type ErrorMsg = String type Setting = String type ProtocolVersion = Int +type Size = Integer supportedProtocolVersions :: [ProtocolVersion] supportedProtocolVersions = [1] @@ -253,6 +262,10 @@ instance Proto.Serializable Cost where serialize = show deserialize = readish +instance Proto.Serializable Size where + serialize = show + deserialize = readish + instance Proto.Serializable Availability where serialize GloballyAvailable = "GLOBAL" serialize LocallyAvailable = "LOCAL" diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 43e3d8b169..6bf99c1351 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -123,6 +123,7 @@ gen' r u c gc = do , mkUnavailable = return Nothing , getInfo = return $ gitRepoInfo r , claimUrl = Nothing + , checkUrl = const $ return Nothing } return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this rsyncopts) diff --git a/Remote/Git.hs b/Remote/Git.hs index fdadac2d64..74fb81965a 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -161,6 +161,7 @@ gen r u c gc , mkUnavailable = unavailable r u c gc , getInfo = return $ gitRepoInfo r , claimUrl = Nothing + , checkUrl = const $ return Nothing } unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 5484a0d2fe..17f7550007 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -69,7 +69,8 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost mkUnavailable = return Nothing, getInfo = includeCredsInfo c (AWS.creds u) $ [ ("glacier vault", getVault c) ], - claimUrl = Nothing + claimUrl = Nothing, + checkUrl = const $ return Nothing } specialcfg = (specialRemoteCfg c) -- Disabled until jobList gets support for chunks. diff --git a/Remote/Hook.hs b/Remote/Hook.hs index a84ee85547..09297a6e2c 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -62,7 +62,8 @@ gen r u c gc = do mkUnavailable = gen r u c $ gc { remoteAnnexHookType = Just "!dne!" }, getInfo = return [("hooktype", hooktype)], - claimUrl = Nothing + claimUrl = Nothing, + checkUrl = const $ return Nothing } where hooktype = fromMaybe (error "missing hooktype") $ remoteAnnexHookType gc diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 6e71cb2bbb..7a7f68165a 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -85,6 +85,7 @@ gen r u c gc = do , mkUnavailable = return Nothing , getInfo = return [("url", url)] , claimUrl = Nothing + , checkUrl = const $ return Nothing } where specialcfg = (specialRemoteCfg c) diff --git a/Remote/S3.hs b/Remote/S3.hs index 42f4f1ffb4..f569047294 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -93,7 +93,8 @@ gen r u c gc = do else Nothing , Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c)) ], - claimUrl = Nothing + claimUrl = Nothing, + checkUrl = const $ return Nothing } s3Setup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) @@ -163,7 +164,7 @@ store r h = fileStorer $ \k f p -> do _ -> singlepartupload k f p -- Store public URL to item in Internet Archive. when (isIA (hinfo h) && not (isChunkKey k)) $ - setUrlPresent k (iaKeyUrl r k) + setUrlPresent webUUID k (iaKeyUrl r k) return True where singlepartupload k f p = do diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index 8df590f578..8b56bbd502 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -86,7 +86,8 @@ gen r u c gc = do remotetype = remote, mkUnavailable = return Nothing, getInfo = return [], - claimUrl = Nothing + claimUrl = Nothing, + checkUrl = const $ return Nothing } tahoeSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) diff --git a/Remote/Web.hs b/Remote/Web.hs index 6ddf1a45ae..3845dddf53 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -1,4 +1,4 @@ -{- Web remotes. +{- Web remote. - - Copyright 2011 Joey Hess - @@ -52,7 +52,7 @@ gen r _ c gc = removeKey = dropKey, checkPresent = checkKey, checkPresentCheap = False, - whereisKey = Just getUrls, + whereisKey = Just getWebUrls, remoteFsck = Nothing, repairRepo = Nothing, config = c, @@ -64,11 +64,12 @@ gen r _ c gc = remotetype = remote, mkUnavailable = return Nothing, getInfo = return [], - claimUrl = Nothing -- implicitly claims all urls + claimUrl = Nothing, -- implicitly claims all urls + checkUrl = const $ return Nothing } downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -downloadKey key _file dest _p = get =<< getUrls key +downloadKey key _file dest _p = get =<< getWebUrls key where get [] = do warning "no known url" @@ -86,7 +87,7 @@ downloadKey key _file dest _p = get =<< getUrls key warning "quvi support needed for this url" return False #endif - DefaultDownloader -> downloadUrl [u'] dest + _ -> downloadUrl [u'] dest downloadKeyCheap :: Key -> FilePath -> Annex Bool downloadKeyCheap _ _ = return False @@ -98,12 +99,12 @@ uploadKey _ _ _ = do dropKey :: Key -> Annex Bool dropKey k = do - mapM_ (setUrlMissing k) =<< getUrls k + mapM_ (setUrlMissing webUUID k) =<< getWebUrls k return True checkKey :: Key -> Annex Bool checkKey key = do - us <- getUrls key + us <- getWebUrls key if null us then return False else either error return =<< checkKey' key us @@ -118,7 +119,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do #else return $ Left "quvi support needed for this url" #endif - DefaultDownloader -> do + _ -> do Url.withUrlOptions $ catchMsgIO . Url.checkBoth u' (keySize key) where @@ -128,3 +129,9 @@ checkKey' key us = firsthit us (Right False) $ \u -> do case r of Right _ -> return r Left _ -> firsthit rest r a + +getWebUrls :: Key -> Annex [URLString] +getWebUrls key = filter supported <$> getUrls key + where + supported u = snd (getDownloader u) + `elem` [WebDownloader, QuviDownloader] diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 6b56acca6e..57e1dd7856 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -74,7 +74,8 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost mkUnavailable = gen r u (M.insert "url" "http://!dne!/" c) gc, getInfo = includeCredsInfo c (davCreds u) $ [("url", fromMaybe "unknown" (M.lookup "url" c))], - claimUrl = Nothing + claimUrl = Nothing, + checkUrl = const $ return Nothing } chunkconfig = getChunkConfig c diff --git a/Types/Remote.hs b/Types/Remote.hs index 3f71e1fb48..baa857906d 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -103,7 +103,12 @@ data RemoteA a = Remote { -- Information about the remote, for git annex info to display. getInfo :: a [(String, String)], -- Some remotes can download from an url (or uri). - claimUrl :: Maybe (URLString -> a Bool) + claimUrl :: Maybe (URLString -> a Bool), + -- Checks that the url is accessible, and gets the size of its + -- content. Returns Nothing if the url is accessible, but + -- its size cannot be determined inexpensively. + -- Throws an exception if the url is inaccessible. + checkUrl :: URLString -> a (Maybe Integer) } instance Show (RemoteA a) where diff --git a/debian/changelog b/debian/changelog index 270d3187f9..39f165d6b6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,6 +4,9 @@ git-annex (5.20141204) UNRELEASED; urgency=medium Thanks, Jon Ander PeƱalba. * External special remote protocol now includes commands for setting and getting the urls associated with a key. + * Urls can now be claimed by remotes. This will allow creating, + for example, a external special remote that handles magnet: and + *.torrent urls. -- Joey Hess Fri, 05 Dec 2014 13:42:08 -0400 diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index 332cc37b17..6c06fd9020 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -125,10 +125,16 @@ replying with `UNSUPPORTED-REQUEST` is acceptable. If the remote replies with `UNSUPPORTED-REQUEST`, its availability is assumed to be global. So, only remotes that are only reachable locally need to worry about implementing this. -* `CLAIMURL Value` +* `CLAIMURL Url` Asks the remote if it wishes to claim responsibility for downloading an url. If so, the remote should send back an `CLAIMURL-SUCCESS` reply. If not, it can send `CLAIMURL-FAILURE`. +* `CHECKURL Url` + Asks the remote to check if the url's content can currently be downloaded + (without downloading it). If the url is not accessible, send + `CHECKURL-FAILURE`. If the url is accessible and the size is known, + send the size in `CHECKURL-SIZE`. If the url is accessible, but the size + is unknown, send `CHECKURL-SIZEUNOWN`. More optional requests may be added, without changing the protocol version, so if an unknown request is seen, reply with `UNSUPPORTED-REQUEST`. @@ -175,6 +181,14 @@ while it's handling a request. Indicates that the CLAIMURL url will be handled by this remote. * `CLAIMURL-FAILURE` Indicates that the CLAIMURL url wil not be handled by this remote. +* `CHECKURL-SIZE Size` + Indicates that the requested url has been verified to exist, + and its size is known. The size is in bytes. +* `CHECKURL-SIZEUNKNOWN` + Indicates that the requested url has been verified to exist, + but its size could not be determined. +* `CHECKURL-FAILURE` + Indicates that the requested url could not be accessed. * `UNSUPPORTED-REQUEST` Indicates that the special remote does not know how to handle a request. @@ -255,14 +269,14 @@ in control. * `GETSTATE Key` Gets any state that has been stored for the key. (git-annex replies with VALUE followed by the state.) -* `SETURLPRESENT Key Value` +* `SETURLPRESENT Key Url` Records an url (or uri) where the Key can be downloaded from. -* `SETURLMISSING Key Value` +* `SETURLMISSING Key Url` Records that the key can no longer be downloaded from the specified url (or uri). -* `GETURLS Key Value` +* `GETURLS Key Prefix` Gets the recorded urls where a Key can be downloaded from. - Only urls that start with the Value will be returned. The Value + Only urls that start with the Prefix will be returned. The Prefix may be empty to get all urls. (git-annex replies one or more times with VALUE for each url. The final VALUE has an empty value, indicating the end of the url list.) diff --git a/doc/devblog/day_237__extending_addurl.mdwn b/doc/devblog/day_237__extending_addurl.mdwn new file mode 100644 index 0000000000..e0129398ec --- /dev/null +++ b/doc/devblog/day_237__extending_addurl.mdwn @@ -0,0 +1,14 @@ +Worked on [[todo/extensible_addurl]] today. When `git annex addurl` is run, +remotes will be asked if they claim the url, and whichever remote does will +be used to download it, and location tracking will indicate that remote +contains the object. This is a masive 1000 line patch touching 30 files, +including follow-on changes in `rmurl` and `whereis` and even `rekey`. + +It should now be possible to build an external special remote that handles +*.torrent and magnet: urls and passes them off to a bittorrent client for +download, for example. + +Another use for this would be to make an external special remote that +uses youtube-dl or some other program than quvi for downloading web videos. +The builtin quvi support could probably be moved out of the web special +remote, to a separate remote. I haven't tried to do that yet. diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 9970a0bbdc..b8cb559b64 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -182,8 +182,9 @@ Example: ## `aaa/bbb/*.log.web` These log files record urls used by the -[[web_special_remote|special_remotes/web]]. Their format is similar -to the location tracking files, but with urls rather than UUIDs. +[[web_special_remote|special_remotes/web]] and sometimes by other remotes. +Their format is similar to the location tracking files, but with urls +rather than UUIDs. ## `aaa/bbb/*.log.rmt` diff --git a/doc/todo/extensible_addurl.mdwn b/doc/todo/extensible_addurl.mdwn index e9a8d070ab..44b19fef05 100644 --- a/doc/todo/extensible_addurl.mdwn +++ b/doc/todo/extensible_addurl.mdwn @@ -25,11 +25,40 @@ Solution: Add a new method to remotes: claimUrl :: Maybe (URLString -> Annex Bool) Remotes that implement this method (including special remotes) will -be queried when such an uri is added, to see which claims it. Once the -remote is known, addurl will record that the Key is present on that remote, -and record the uri in the url log. +be queried when such an uri is added, to see which claims it. -Then retrieval of the Key works more or less as usual. The only +Once the remote is known, addurl --file will record that the Key is present +on that remote, and record the uri in the url log. + +---- + +What about using addurl to add a new file? In this mode, the Key is not yet +known. addurl currently handles this by generating a dummy Key for the url +(hitting the url to get its size), and running a Transfer using the dummy +key that downloads from the web. Once the download is done, the dummy Key +is upgraded to the final Key. + +Something similar could be done for other remotes, but the url log for the +dummy key would need to have the url added to it, for the remote to know +what to download, and then that could be removed after the download. Which +causes ugly churn in git, and would leave a mess if interrupted. + +One option is to add another new method to remotes: + + downloadUrl :: Maybe (URLString -> Annex FilePath) + +Or, the url log could have support added for recording temporary key +urls in memory. (done) + +Another problem is that the size of the Key isn't known. addurl +could always operate in relaxed mode, where it generates a size-less Key. +Or, yet another method could be added: (done) + + sizeUrl :: URLString -> Annex (Maybe Integer) + +---- + +Retrieval of the Key works more or less as usual. The only difference being that remotes that support this interface can look at the url log to find the one with the right "$downloader:" prefix, and so know where to download from. (Much as the web special remote already @@ -55,3 +84,5 @@ This could be implemented in either the web special remote or even in an external special remote. Some other discussion at + +> [[done]]! --[[Joey]]