addurl: When a Content-Disposition header suggests a filename to use, addurl will consider using it, if it's reasonable and doesn't conflict with an existing file. (--file overrides this)
This commit is contained in:
parent
91f1b2bdcf
commit
587f6a919b
7 changed files with 113 additions and 72 deletions
|
@ -96,7 +96,7 @@ newAssistantUrl repo = do
|
|||
- warp-tls listens to http, in order to show an error page, so this works.
|
||||
-}
|
||||
assistantListening :: URLString -> IO Bool
|
||||
assistantListening url = catchBoolIO $ fst <$> exists url' def
|
||||
assistantListening url = catchBoolIO $ exists url' def
|
||||
where
|
||||
url' = case parseURI url of
|
||||
Nothing -> url
|
||||
|
|
|
@ -191,7 +191,7 @@ escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ')
|
|||
getRepoInfo :: RemoteConfig -> Widget
|
||||
getRepoInfo c = do
|
||||
uo <- liftAnnex Url.getUrlOptions
|
||||
exists <- liftIO $ catchDefaultIO False $ fst <$> Url.exists url uo
|
||||
exists <- liftIO $ catchDefaultIO False $ Url.exists url uo
|
||||
[whamlet|
|
||||
<a href="#{url}">
|
||||
Internet Archive item
|
||||
|
|
|
@ -120,17 +120,16 @@ downloadRemoteFile r relaxed uri file sz = do
|
|||
loguri = setDownloader uri OtherDownloader
|
||||
|
||||
startWeb :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart
|
||||
startWeb relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s
|
||||
startWeb relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI urlstring
|
||||
where
|
||||
(s', downloader) = getDownloader s
|
||||
bad = fromMaybe (error $ "bad url " ++ s') $
|
||||
parseURI $ escapeURIString isUnescapedInURI s'
|
||||
choosefile = flip fromMaybe optfile
|
||||
(urlstring, downloader) = getDownloader s
|
||||
bad = fromMaybe (error $ "bad url " ++ urlstring) $
|
||||
parseURI $ escapeURIString isUnescapedInURI urlstring
|
||||
go url = case downloader of
|
||||
QuviDownloader -> usequvi
|
||||
_ ->
|
||||
#ifdef WITH_QUVI
|
||||
ifM (quviSupported s')
|
||||
ifM (quviSupported urlstring)
|
||||
( usequvi
|
||||
, regulardownload url
|
||||
)
|
||||
|
@ -139,30 +138,44 @@ startWeb relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s
|
|||
#endif
|
||||
regulardownload url = do
|
||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||
let file = choosefile $ url2file url pathdepth pathmax
|
||||
urlinfo <- if relaxed
|
||||
then pure $ Url.UrlInfo True Nothing Nothing
|
||||
else Url.withUrlOptions (Url.getUrlInfo urlstring)
|
||||
file <- case optfile of
|
||||
Just f -> pure f
|
||||
Nothing -> case Url.urlSuggestedFile urlinfo of
|
||||
Nothing -> pure $ url2file url pathdepth pathmax
|
||||
Just sf -> do
|
||||
let f = truncateFilePath pathmax $
|
||||
sanitizeFilePath sf
|
||||
ifM (liftIO $ doesFileExist f <||> doesDirectoryExist f)
|
||||
( pure $ url2file url pathdepth pathmax
|
||||
, pure f
|
||||
)
|
||||
showStart "addurl" file
|
||||
next $ performWeb relaxed s' file
|
||||
next $ performWeb relaxed urlstring file urlinfo
|
||||
#ifdef WITH_QUVI
|
||||
badquvi = error $ "quvi does not know how to download url " ++ s'
|
||||
badquvi = error $ "quvi does not know how to download url " ++ urlstring
|
||||
usequvi = do
|
||||
page <- fromMaybe badquvi
|
||||
<$> withQuviOptions Quvi.forceQuery [Quvi.quiet, Quvi.httponly] s'
|
||||
<$> withQuviOptions Quvi.forceQuery [Quvi.quiet, Quvi.httponly] urlstring
|
||||
let link = fromMaybe badquvi $ headMaybe $ Quvi.pageLinks page
|
||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||
let file = choosefile $ truncateFilePath pathmax $ sanitizeFilePath $
|
||||
Quvi.pageTitle page ++ "." ++ Quvi.linkSuffix link
|
||||
let file = flip fromMaybe optfile $
|
||||
truncateFilePath pathmax $ sanitizeFilePath $
|
||||
Quvi.pageTitle page ++ "." ++ Quvi.linkSuffix link
|
||||
showStart "addurl" file
|
||||
next $ performQuvi relaxed s' (Quvi.linkUrl link) file
|
||||
next $ performQuvi relaxed urlstring (Quvi.linkUrl link) file
|
||||
#else
|
||||
usequvi = error "not built with quvi support"
|
||||
#endif
|
||||
|
||||
performWeb :: Bool -> URLString -> FilePath -> CommandPerform
|
||||
performWeb relaxed url file = ifAnnexed file addurl geturl
|
||||
performWeb :: Bool -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform
|
||||
performWeb relaxed url file urlinfo = ifAnnexed file addurl geturl
|
||||
where
|
||||
geturl = next $ isJust <$> addUrlFile relaxed url file
|
||||
addurl = addUrlChecked relaxed url webUUID checkexistssize
|
||||
checkexistssize = Url.withUrlOptions . Url.check url . keySize
|
||||
geturl = next $ isJust <$> addUrlFile relaxed url urlinfo file
|
||||
addurl = addUrlChecked relaxed url webUUID $ \k -> return $
|
||||
(Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k)
|
||||
|
||||
#ifdef WITH_QUVI
|
||||
performQuvi :: Bool -> URLString -> URLString -> FilePath -> CommandPerform
|
||||
|
@ -189,7 +202,8 @@ addUrlFileQuvi relaxed quviurl videourl file = do
|
|||
- retained, because the size of a video stream
|
||||
- might change and we want to be able to download
|
||||
- it later. -}
|
||||
sizedkey <- addSizeUrlKey videourl key
|
||||
urlinfo <- Url.withUrlOptions (Url.getUrlInfo videourl)
|
||||
let sizedkey = addSizeUrlKey urlinfo key
|
||||
prepGetViaTmpChecked sizedkey Nothing $ do
|
||||
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
|
||||
showOutput
|
||||
|
@ -225,17 +239,17 @@ addUrlChecked relaxed url u checkexistssize key
|
|||
stop
|
||||
)
|
||||
|
||||
addUrlFile :: Bool -> URLString -> FilePath -> Annex (Maybe Key)
|
||||
addUrlFile relaxed url file = do
|
||||
addUrlFile :: Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
|
||||
addUrlFile relaxed url urlinfo file = do
|
||||
liftIO $ createDirectoryIfMissing True (parentDir file)
|
||||
ifM (Annex.getState Annex.fast <||> pure relaxed)
|
||||
( nodownload relaxed url file
|
||||
, downloadWeb url file
|
||||
( nodownload relaxed url urlinfo file
|
||||
, downloadWeb url urlinfo file
|
||||
)
|
||||
|
||||
downloadWeb :: URLString -> FilePath -> Annex (Maybe Key)
|
||||
downloadWeb url file = do
|
||||
dummykey <- addSizeUrlKey url =<< Backend.URL.fromUrl url Nothing
|
||||
downloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
|
||||
downloadWeb url urlinfo file = do
|
||||
dummykey <- addSizeUrlKey urlinfo <$> Backend.URL.fromUrl url Nothing
|
||||
let downloader f _ = do
|
||||
showOutput
|
||||
downloadUrl [url] f
|
||||
|
@ -272,15 +286,9 @@ downloadWith downloader dummykey u url file =
|
|||
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
||||
downloader tmp p
|
||||
|
||||
{- Hits the url to get the size, if available.
|
||||
-
|
||||
- This is needed to avoid exceeding the diskreserve when downloading,
|
||||
- and so the assistant can display a pretty progress bar.
|
||||
-}
|
||||
addSizeUrlKey :: URLString -> Key -> Annex Key
|
||||
addSizeUrlKey url key = do
|
||||
size <- snd <$> Url.withUrlOptions (Url.exists url)
|
||||
return $ key { keySize = size }
|
||||
{- Adds the url size to the Key. -}
|
||||
addSizeUrlKey :: Url.UrlInfo -> Key -> Key
|
||||
addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo }
|
||||
|
||||
cleanup :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
|
||||
cleanup u url file key mtmp = do
|
||||
|
@ -295,19 +303,15 @@ cleanup u url file key mtmp = do
|
|||
Annex.Queue.flush
|
||||
maybe noop (moveAnnex key) mtmp
|
||||
|
||||
nodownload :: Bool -> URLString -> FilePath -> Annex (Maybe Key)
|
||||
nodownload relaxed url file = do
|
||||
(exists, size) <- if relaxed
|
||||
then pure (True, Nothing)
|
||||
else Url.withUrlOptions (Url.exists url)
|
||||
if exists
|
||||
then do
|
||||
key <- Backend.URL.fromUrl url size
|
||||
cleanup webUUID url file key Nothing
|
||||
return (Just key)
|
||||
else do
|
||||
warning $ "unable to access url: " ++ url
|
||||
return Nothing
|
||||
nodownload :: Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
|
||||
nodownload relaxed url urlinfo file
|
||||
| Url.urlExists urlinfo = do
|
||||
key <- Backend.URL.fromUrl url (Url.urlSize urlinfo)
|
||||
cleanup webUUID url file key Nothing
|
||||
return (Just key)
|
||||
| otherwise = do
|
||||
warning $ "unable to access url: " ++ url
|
||||
return Nothing
|
||||
|
||||
url2file :: URI -> Maybe Int -> Int -> FilePath
|
||||
url2file url pathdepth pathmax = case pathdepth of
|
||||
|
|
|
@ -144,7 +144,9 @@ performDownload relaxed cache todownload = case location todownload of
|
|||
rundownload url (takeExtension url) $ \f -> do
|
||||
r <- Remote.claimingUrl url
|
||||
if Remote.uuid r == webUUID
|
||||
then maybeToList <$> addUrlFile relaxed url f
|
||||
then do
|
||||
urlinfo <- Url.withUrlOptions (Url.getUrlInfo url)
|
||||
maybeToList <$> addUrlFile relaxed url urlinfo f
|
||||
else do
|
||||
res <- tryNonAsync $ maybe
|
||||
(error $ "unable to checkUrl of " ++ Remote.name r)
|
||||
|
|
|
@ -17,6 +17,8 @@ module Utility.Url (
|
|||
check,
|
||||
checkBoth,
|
||||
exists,
|
||||
UrlInfo(..),
|
||||
getUrlInfo,
|
||||
download,
|
||||
downloadQuiet,
|
||||
parseURIRelaxed
|
||||
|
@ -84,18 +86,27 @@ checkBoth url expected_size uo = do
|
|||
v <- check url expected_size uo
|
||||
return (fst v && snd v)
|
||||
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
|
||||
check url expected_size = go <$$> exists url
|
||||
check url expected_size = go <$$> getUrlInfo url
|
||||
where
|
||||
go (False, _) = (False, False)
|
||||
go (True, Nothing) = (True, True)
|
||||
go (True, s) = case expected_size of
|
||||
go (UrlInfo False _ _) = (False, False)
|
||||
go (UrlInfo True Nothing _) = (True, True)
|
||||
go (UrlInfo True s _) = case expected_size of
|
||||
Just _ -> (True, expected_size == s)
|
||||
Nothing -> (True, True)
|
||||
|
||||
exists :: URLString -> UrlOptions -> IO Bool
|
||||
exists url uo = urlExists <$> getUrlInfo url uo
|
||||
|
||||
data UrlInfo = UrlInfo
|
||||
{ urlExists :: Bool
|
||||
, urlSize :: Maybe Integer
|
||||
, urlSuggestedFile :: Maybe FilePath
|
||||
}
|
||||
|
||||
{- Checks that an url exists and could be successfully downloaded,
|
||||
- also returning its size if available. -}
|
||||
exists :: URLString -> UrlOptions -> IO (Bool, Maybe Integer)
|
||||
exists url uo = case parseURIRelaxed url of
|
||||
- also returning its size and suggested filename if available. -}
|
||||
getUrlInfo :: URLString -> UrlOptions -> IO UrlInfo
|
||||
getUrlInfo url uo = case parseURIRelaxed url of
|
||||
Just u -> case parseUrl (show u) of
|
||||
Just req -> existsconduit req `catchNonAsync` const dne
|
||||
-- http-conduit does not support file:, ftp:, etc urls,
|
||||
|
@ -107,18 +118,21 @@ exists url uo = case parseURIRelaxed url of
|
|||
case s of
|
||||
Just stat -> do
|
||||
sz <- getFileSize' f stat
|
||||
return (True, Just sz)
|
||||
found (Just sz) Nothing
|
||||
Nothing -> dne
|
||||
| Build.SysConfig.curl -> do
|
||||
output <- catchDefaultIO "" $
|
||||
readProcess "curl" $ toCommand curlparams
|
||||
case lastMaybe (lines output) of
|
||||
Just ('2':_:_) -> return (True, extractlencurl output)
|
||||
Just ('2':_:_) -> found
|
||||
(extractlencurl output)
|
||||
Nothing
|
||||
_ -> dne
|
||||
| otherwise -> dne
|
||||
Nothing -> dne
|
||||
where
|
||||
dne = return (False, Nothing)
|
||||
dne = return $ UrlInfo False Nothing Nothing
|
||||
found sz f = return $ UrlInfo True sz f
|
||||
|
||||
curlparams = addUserAgent uo $
|
||||
[ Param "-s"
|
||||
|
@ -133,23 +147,36 @@ exists url uo = case parseURIRelaxed url of
|
|||
_ -> Nothing
|
||||
_ -> Nothing
|
||||
|
||||
extractlen resp = readish . B8.toString =<< headMaybe lenheaders
|
||||
where
|
||||
lenheaders = map snd $
|
||||
filter (\(h, _) -> h == hContentLength)
|
||||
(responseHeaders resp)
|
||||
|
||||
extractlen = readish . B8.toString <=< firstheader hContentLength
|
||||
|
||||
extractfilename = contentDispositionFilename . B8.toString
|
||||
<=< firstheader hContentDisposition
|
||||
|
||||
firstheader h = headMaybe . map snd .
|
||||
filter (\p -> fst p == h) . responseHeaders
|
||||
|
||||
existsconduit req = withManager $ \mgr -> do
|
||||
let req' = headRequest (applyRequest uo req)
|
||||
resp <- http req' mgr
|
||||
-- forces processing the response before the
|
||||
-- manager is closed
|
||||
ret <- if responseStatus resp == ok200
|
||||
then return (True, extractlen resp)
|
||||
else liftIO dne
|
||||
ret <- liftIO $ if responseStatus resp == ok200
|
||||
then found
|
||||
(extractlen resp)
|
||||
(extractfilename resp)
|
||||
else dne
|
||||
liftIO $ closeManager mgr
|
||||
return ret
|
||||
|
||||
-- Parse eg: attachment; filename="fname.ext"
|
||||
-- per RFC 2616
|
||||
contentDispositionFilename :: String -> Maybe FilePath
|
||||
contentDispositionFilename s
|
||||
| "attachment; filename=\"" `isPrefixOf` s && "\"" `isSuffixOf` s =
|
||||
Just $ reverse $ drop 1 $ reverse $
|
||||
drop 1 $ dropWhile (/= '"') s
|
||||
| otherwise = Nothing
|
||||
|
||||
#if MIN_VERSION_http_conduit(2,0,0)
|
||||
headRequest :: Request -> Request
|
||||
#else
|
||||
|
@ -229,6 +256,9 @@ parseURIRelaxed = parseURI . escapeURIString isAllowedInURI
|
|||
hAcceptEncoding :: CI.CI B.ByteString
|
||||
hAcceptEncoding = "Accept-Encoding"
|
||||
|
||||
hContentDisposition :: CI.CI B.ByteString
|
||||
hContentDisposition = "Content-Disposition"
|
||||
|
||||
#if ! MIN_VERSION_http_types(0,7,0)
|
||||
hContentLength :: CI.CI B.ByteString
|
||||
hContentLength = "Content-Length"
|
||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -16,6 +16,9 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
|
|||
* Avoid using fileSize which maxes out at just 2 gb on Windows.
|
||||
Instead, use hFileSize, which doesn't have a bounded size.
|
||||
Fixes support for files > 2 gb on Windows.
|
||||
* addurl: When a Content-Disposition header suggests a filename to use,
|
||||
addurl will consider using it, if it's reasonable and doesn't conflict
|
||||
with an existing file. (--file overrides this)
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
|
||||
|
||||
|
|
|
@ -218,8 +218,10 @@ subdirectories).
|
|||
is there at a future point, specify `--relaxed`. (Implies `--fast`.)
|
||||
|
||||
Normally the filename is based on the full url, so will look like
|
||||
"www.example.com_dir_subdir_bigfile". For a shorter filename, specify
|
||||
`--pathdepth=N`. For example, `--pathdepth=1` will use "dir/subdir/bigfile",
|
||||
"www.example.com_dir_subdir_bigfile". In some cases, addurl is able to
|
||||
come up with a better filename based on other information. Or, for a
|
||||
shorter filename, specify `--pathdepth=N`. For example,
|
||||
`--pathdepth=1` will use "dir/subdir/bigfile",
|
||||
while `--pathdepth=3` will use "bigfile". It can also be negative;
|
||||
`--pathdepth=-2` will use the last two parts of the url.
|
||||
|
||||
|
|
Loading…
Reference in a new issue