From 2cd84fcc8b99d6cc5def7dcbd809b82dfc3b31ec Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 11 Dec 2014 15:32:42 -0400 Subject: [PATCH] Expand checkurl to support recommended filename, and multi-file-urls This commit was sponsored by an anonymous bitcoiner. --- Command/AddUrl.hs | 88 +++++++++---------- Remote/Bup.hs | 2 +- Remote/Ddar.hs | 2 +- Remote/Directory.hs | 2 +- Remote/External.hs | 12 ++- Remote/External/Types.hs | 25 ++++-- Remote/GCrypt.hs | 2 +- Remote/Git.hs | 2 +- Remote/Glacier.hs | 2 +- Remote/Hook.hs | 2 +- Remote/Rsync.hs | 2 +- Remote/S3.hs | 2 +- Remote/Tahoe.hs | 2 +- Remote/Web.hs | 2 +- Remote/WebDAV.hs | 2 +- Types/Remote.hs | 8 +- Types/UrlContents.hs | 19 ++++ .../external_special_remote_protocol.mdwn | 16 ++-- 18 files changed, 115 insertions(+), 77 deletions(-) create mode 100644 Types/UrlContents.hs diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 76095d6e4a..6f14ed861c 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -25,6 +25,7 @@ import Annex.Content import Logs.Web import Types.Key import Types.KeySource +import Types.UrlContents import Config import Annex.Content.Direct import Logs.Location @@ -50,73 +51,70 @@ relaxedOption :: Option relaxedOption = flagOption [] "relaxed" "skip size check" seek :: CommandSeek -seek ps = do - f <- getOptionField fileOption return +seek us = do + optfile <- getOptionField fileOption return relaxed <- getOptionFlag relaxedOption - d <- getOptionField pathdepthOption (return . maybe Nothing readish) - withStrings (start relaxed f d) ps + pathdepth <- getOptionField pathdepthOption (return . maybe Nothing readish) + forM_ us $ \u -> do + r <- Remote.claimingUrl u + if Remote.uuid r == webUUID + then void $ commandAction $ startWeb relaxed optfile pathdepth u + else do + let handlecontents url c = case c of + UrlContents sz mkf -> + void $ commandAction $ + startRemote r relaxed optfile pathdepth url sz mkf + UrlNested l -> + forM_ l $ \(url', c) -> + handlecontents url' c + res <- tryNonAsync $ maybe + (error "unable to checkUrl") + (flip id u) + (Remote.checkUrl r) + case res of + Left e -> void $ commandAction $ do + showStart "addurl" u + warning (show e) + next $ next $ return False + Right c -> handlecontents u c -start :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart -start relaxed optfile pathdepth s = do - r <- Remote.claimingUrl s - if Remote.uuid r == webUUID - then startWeb relaxed optfile pathdepth s - else startRemote r relaxed optfile pathdepth s - -startRemote :: Remote -> Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart -startRemote r relaxed optfile pathdepth s = do +startRemote :: Remote -> Bool -> Maybe FilePath -> Maybe Int -> String -> Maybe Integer -> (FilePath -> FilePath) -> CommandStart +startRemote r relaxed optfile pathdepth s sz mkf = do url <- case Url.parseURIRelaxed s of Nothing -> error $ "bad uri " ++ s Just u -> pure u pathmax <- liftIO $ fileNameLengthLimit "." - let file = choosefile $ url2file url pathdepth pathmax + let file = mkf $ choosefile $ url2file url pathdepth pathmax showStart "addurl" file showNote $ "using " ++ Remote.name r - next $ performRemote r relaxed s file + next $ performRemote r relaxed s file sz where choosefile = flip fromMaybe optfile -performRemote :: Remote -> Bool -> URLString -> FilePath -> CommandPerform -performRemote r relaxed uri file = ifAnnexed file adduri geturi +performRemote :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> CommandPerform +performRemote r relaxed uri file sz = ifAnnexed file adduri geturi where loguri = setDownloader uri OtherDownloader adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize - checkexistssize key = do - res <- tryNonAsync $ Remote.checkUrl r uri - case res of - Left e -> do - warning (show e) - return (False, False) - Right Nothing -> - return (True, True) - Right (Just sz) -> - return (True, sz == fromMaybe sz (keySize key)) + checkexistssize key = return $ case sz of + Nothing -> (True, True) + Just n -> (True, n == fromMaybe n (keySize key)) geturi = do - dummykey <- Backend.URL.fromUrl uri =<< - if relaxed - then return Nothing - else Remote.checkUrl r uri + urlkey <- Backend.URL.fromUrl uri sz liftIO $ createDirectoryIfMissing True (parentDir file) next $ ifM (Annex.getState Annex.fast <||> pure relaxed) ( do - res <- tryNonAsync $ Remote.checkUrl r uri - case res of - Left e -> do - warning (show e) - return False - Right size -> do - key <- Backend.URL.fromUrl uri size - cleanup (Remote.uuid r) loguri file key Nothing - return True + cleanup (Remote.uuid r) loguri file urlkey Nothing + return True , do - -- Set temporary url for the dummy key + -- Set temporary url for the urlkey -- so that the remote knows what url it -- should use to download it. - setTempUrl dummykey uri - let downloader = Remote.retrieveKeyFile r dummykey (Just file) + setTempUrl urlkey uri + let downloader = Remote.retrieveKeyFile r urlkey (Just file) ok <- isJust <$> - downloadWith downloader dummykey (Remote.uuid r) loguri file - removeTempUrl dummykey + downloadWith downloader urlkey (Remote.uuid r) loguri file + removeTempUrl urlkey return ok ) diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 405ce3056c..16f73a66f8 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -75,7 +75,7 @@ gen r u c gc = do , mkUnavailable = return Nothing , getInfo = return [("repo", buprepo)] , claimUrl = Nothing - , checkUrl = const $ return Nothing + , checkUrl = Nothing } return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this buprepo) diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 1b8003dd8a..f771930518 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -72,7 +72,7 @@ gen r u c gc = do , mkUnavailable = return Nothing , getInfo = return [("repo", ddarrepo)] , claimUrl = Nothing - , checkUrl = const $ return Nothing + , checkUrl = Nothing } ddarrepo = fromMaybe (error "missing ddarrepo") $ remoteAnnexDdarRepo gc specialcfg = (specialRemoteCfg c) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index fec40baa81..b798ff07c1 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -70,7 +70,7 @@ gen r u c gc = do gc { remoteAnnexDirectory = Just "/dev/null" }, getInfo = return [("directory", dir)], claimUrl = Nothing, - checkUrl = const $ return Nothing + checkUrl = Nothing } where dir = fromMaybe (error "missing directory") $ remoteAnnexDirectory gc diff --git a/Remote/External.hs b/Remote/External.hs index 62671755c0..c5330f7ea6 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -12,6 +12,7 @@ import qualified Annex import Common.Annex import Types.Remote import Types.CleanupActions +import Types.UrlContents import qualified Git import Config import Remote.Helper.Special @@ -71,7 +72,7 @@ gen r u c gc = do gc { remoteAnnexExternalType = Just "!dne!" }, getInfo = return [("externaltype", externaltype)], claimUrl = Just (claimurl external), - checkUrl = checkurl external + checkUrl = Just (checkurl external) } where externaltype = fromMaybe (error "missing externaltype") (remoteAnnexExternalType gc) @@ -429,11 +430,14 @@ claimurl external url = UNSUPPORTED_REQUEST -> Just $ return False _ -> Nothing -checkurl :: External -> URLString -> Annex (Maybe Integer) +checkurl :: External -> URLString -> Annex UrlContents checkurl external url = handleRequest external (CHECKURL url) Nothing $ \req -> case req of - CHECKURL_SIZE sz -> Just $ return $ Just sz - CHECKURL_SIZEUNKNOWN -> Just $ return Nothing + CHECKURL_CONTENTS sz f -> Just $ return $ UrlContents sz + (if null f then id else const f) + CHECKURL_MULTI l -> Just $ return $ UrlNested $ map mknested l CHECKURL_FAILURE errmsg -> Just $ error errmsg UNSUPPORTED_REQUEST -> error "CHECKURL not implemented by external special remote" _ -> Nothing + where + mknested (url', sz, f) = (url', UrlContents sz (const f)) diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index b00352702b..73177d3169 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -137,8 +137,8 @@ data Response | INITREMOTE_FAILURE ErrorMsg | CLAIMURL_SUCCESS | CLAIMURL_FAILURE - | CHECKURL_SIZE Size - | CHECKURL_SIZEUNKNOWN + | CHECKURL_CONTENTS Size FilePath + | CHECKURL_MULTI [(URLString, Size, FilePath)] | CHECKURL_FAILURE ErrorMsg | UNSUPPORTED_REQUEST deriving (Show) @@ -159,8 +159,8 @@ instance Proto.Receivable Response where parseCommand "INITREMOTE-FAILURE" = Proto.parse1 INITREMOTE_FAILURE parseCommand "CLAIMURL-SUCCESS" = Proto.parse0 CLAIMURL_SUCCESS parseCommand "CLAIMURL-FAILURE" = Proto.parse0 CLAIMURL_FAILURE - parseCommand "CHECKURL-SIZE" = Proto.parse1 CHECKURL_SIZE - parseCommand "CHECKURL-SIZEUNKNOWN" = Proto.parse0 CHECKURL_SIZEUNKNOWN + parseCommand "CHECKURL-CONTENTS" = Proto.parse2 CHECKURL_CONTENTS + parseCommand "CHECKURL-MULTI" = Proto.parse1 CHECKURL_MULTI parseCommand "CHECKURL-FAILURE" = Proto.parse1 CHECKURL_FAILURE parseCommand "UNSUPPORTED-REQUEST" = Proto.parse0 UNSUPPORTED_REQUEST parseCommand _ = Proto.parseFail @@ -233,7 +233,7 @@ instance Proto.Receivable AsyncMessage where type ErrorMsg = String type Setting = String type ProtocolVersion = Int -type Size = Integer +type Size = Maybe Integer supportedProtocolVersions :: [ProtocolVersion] supportedProtocolVersions = [1] @@ -263,8 +263,10 @@ instance Proto.Serializable Cost where deserialize = readish instance Proto.Serializable Size where - serialize = show - deserialize = readish + serialize (Just s) = show s + serialize Nothing = "UNKNOWN" + deserialize "UNKNOWN" = Just Nothing + deserialize s = maybe Nothing (Just . Just) (readish s) instance Proto.Serializable Availability where serialize GloballyAvailable = "GLOBAL" @@ -277,3 +279,12 @@ instance Proto.Serializable Availability where instance Proto.Serializable BytesProcessed where serialize (BytesProcessed n) = show n deserialize = BytesProcessed <$$> readish + +instance Proto.Serializable [(URLString, Size, FilePath)] where + serialize = unwords . map go + where + go (url, sz, f) = url ++ " " ++ maybe "UNKNOWN" show sz ++ " " ++ f + deserialize = Just . go [] . words + where + go c (url:sz:f:rest) = go ((url, readish sz, f):c) rest + go c _ = reverse c diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 6bf99c1351..2f2ddc9f3a 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -123,7 +123,7 @@ gen' r u c gc = do , mkUnavailable = return Nothing , getInfo = return $ gitRepoInfo r , claimUrl = Nothing - , checkUrl = const $ return Nothing + , checkUrl = Nothing } return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this rsyncopts) diff --git a/Remote/Git.hs b/Remote/Git.hs index 74fb81965a..04823949c0 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -161,7 +161,7 @@ gen r u c gc , mkUnavailable = unavailable r u c gc , getInfo = return $ gitRepoInfo r , claimUrl = Nothing - , checkUrl = const $ return Nothing + , checkUrl = Nothing } unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 17f7550007..80329b9a99 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -70,7 +70,7 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost getInfo = includeCredsInfo c (AWS.creds u) $ [ ("glacier vault", getVault c) ], claimUrl = Nothing, - checkUrl = const $ return Nothing + checkUrl = Nothing } specialcfg = (specialRemoteCfg c) -- Disabled until jobList gets support for chunks. diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 09297a6e2c..d0b5f7932c 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -63,7 +63,7 @@ gen r u c gc = do gc { remoteAnnexHookType = Just "!dne!" }, getInfo = return [("hooktype", hooktype)], claimUrl = Nothing, - checkUrl = const $ return Nothing + checkUrl = Nothing } where hooktype = fromMaybe (error "missing hooktype") $ remoteAnnexHookType gc diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 7a7f68165a..ad5b77d38f 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -85,7 +85,7 @@ gen r u c gc = do , mkUnavailable = return Nothing , getInfo = return [("url", url)] , claimUrl = Nothing - , checkUrl = const $ return Nothing + , checkUrl = Nothing } where specialcfg = (specialRemoteCfg c) diff --git a/Remote/S3.hs b/Remote/S3.hs index f569047294..e0d441292c 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -94,7 +94,7 @@ gen r u c gc = do , Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c)) ], claimUrl = Nothing, - checkUrl = const $ return Nothing + checkUrl = Nothing } s3Setup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index 8b56bbd502..ac7088bea1 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -87,7 +87,7 @@ gen r u c gc = do mkUnavailable = return Nothing, getInfo = return [], claimUrl = Nothing, - checkUrl = const $ return Nothing + checkUrl = Nothing } tahoeSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) diff --git a/Remote/Web.hs b/Remote/Web.hs index 3845dddf53..639eb7e3b0 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -65,7 +65,7 @@ gen r _ c gc = mkUnavailable = return Nothing, getInfo = return [], claimUrl = Nothing, -- implicitly claims all urls - checkUrl = const $ return Nothing + checkUrl = Nothing } downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 57e1dd7856..27a87a89cf 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -75,7 +75,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost getInfo = includeCredsInfo c (davCreds u) $ [("url", fromMaybe "unknown" (M.lookup "url" c))], claimUrl = Nothing, - checkUrl = const $ return Nothing + checkUrl = Nothing } chunkconfig = getChunkConfig c diff --git a/Types/Remote.hs b/Types/Remote.hs index baa857906d..4d17abf955 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -25,6 +25,7 @@ import Types.UUID import Types.GitConfig import Types.Availability import Types.Creds +import Types.UrlContents import Config.Cost import Utility.Metered import Git.Types @@ -104,11 +105,10 @@ data RemoteA a = Remote { getInfo :: a [(String, String)], -- Some remotes can download from an url (or uri). claimUrl :: Maybe (URLString -> a Bool), - -- Checks that the url is accessible, and gets the size of its - -- content. Returns Nothing if the url is accessible, but - -- its size cannot be determined inexpensively. + -- Checks that the url is accessible, and gets information about + -- its contents, without downloading the full content. -- Throws an exception if the url is inaccessible. - checkUrl :: URLString -> a (Maybe Integer) + checkUrl :: Maybe (URLString -> a UrlContents) } instance Show (RemoteA a) where diff --git a/Types/UrlContents.hs b/Types/UrlContents.hs new file mode 100644 index 0000000000..81b195fe3f --- /dev/null +++ b/Types/UrlContents.hs @@ -0,0 +1,19 @@ +{- git-annex URL contents + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.UrlContents where + +import Utility.Url + +data UrlContents + -- An URL contains a file, whose size may be known. + -- A default filename will be provided, and can be overridded + -- or built on. + = UrlContents (Maybe Integer) (FilePath -> FilePath) + -- Sometimes an URL points to multiple files, each accessible + -- by their own URL. + | UrlNested [(URLString, UrlContents)] diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index 072c5a1a2c..76d25bf08d 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -181,12 +181,18 @@ while it's handling a request. Indicates that the CLAIMURL url will be handled by this remote. * `CLAIMURL-FAILURE` Indicates that the CLAIMURL url wil not be handled by this remote. -* `CHECKURL-SIZE Size` +* `CHECKURL-CONTENTS Size|UNKNOWN Filename` + Indicates that the requested url has been verified to exist. + The Size is the size in bytes, or use "UNKNOWN" if the size could not be + determined. + The Filename can be empty (in which case a default is used), + or can specify a filename that is suggested to be used for this url. +* `CHECKURL-MULTI Url Size|UNKNOWN Filename ...` Indicates that the requested url has been verified to exist, - and its size is known. The size is in bytes. -* `CHECKURL-SIZEUNKNOWN` - Indicates that the requested url has been verified to exist, - but its size could not be determined. + and contains multiple files, which can each be accessed using + their own url. + Note that since a list is returned, neither the Url nor the Filename + can contain spaces. * `CHECKURL-FAILURE` Indicates that the requested url could not be accessed. * `UNSUPPORTED-REQUEST`