addurl: When a Content-Disposition header suggests a filename to use, addurl will consider using it, if it's reasonable and doesn't conflict with an existing file. (--file overrides this)
This commit is contained in:
parent
91f1b2bdcf
commit
587f6a919b
7 changed files with 113 additions and 72 deletions
|
@ -96,7 +96,7 @@ newAssistantUrl repo = do
|
||||||
- warp-tls listens to http, in order to show an error page, so this works.
|
- warp-tls listens to http, in order to show an error page, so this works.
|
||||||
-}
|
-}
|
||||||
assistantListening :: URLString -> IO Bool
|
assistantListening :: URLString -> IO Bool
|
||||||
assistantListening url = catchBoolIO $ fst <$> exists url' def
|
assistantListening url = catchBoolIO $ exists url' def
|
||||||
where
|
where
|
||||||
url' = case parseURI url of
|
url' = case parseURI url of
|
||||||
Nothing -> url
|
Nothing -> url
|
||||||
|
|
|
@ -191,7 +191,7 @@ escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ')
|
||||||
getRepoInfo :: RemoteConfig -> Widget
|
getRepoInfo :: RemoteConfig -> Widget
|
||||||
getRepoInfo c = do
|
getRepoInfo c = do
|
||||||
uo <- liftAnnex Url.getUrlOptions
|
uo <- liftAnnex Url.getUrlOptions
|
||||||
exists <- liftIO $ catchDefaultIO False $ fst <$> Url.exists url uo
|
exists <- liftIO $ catchDefaultIO False $ Url.exists url uo
|
||||||
[whamlet|
|
[whamlet|
|
||||||
<a href="#{url}">
|
<a href="#{url}">
|
||||||
Internet Archive item
|
Internet Archive item
|
||||||
|
|
|
@ -120,17 +120,16 @@ downloadRemoteFile r relaxed uri file sz = do
|
||||||
loguri = setDownloader uri OtherDownloader
|
loguri = setDownloader uri OtherDownloader
|
||||||
|
|
||||||
startWeb :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart
|
startWeb :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart
|
||||||
startWeb relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s
|
startWeb relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI urlstring
|
||||||
where
|
where
|
||||||
(s', downloader) = getDownloader s
|
(urlstring, downloader) = getDownloader s
|
||||||
bad = fromMaybe (error $ "bad url " ++ s') $
|
bad = fromMaybe (error $ "bad url " ++ urlstring) $
|
||||||
parseURI $ escapeURIString isUnescapedInURI s'
|
parseURI $ escapeURIString isUnescapedInURI urlstring
|
||||||
choosefile = flip fromMaybe optfile
|
|
||||||
go url = case downloader of
|
go url = case downloader of
|
||||||
QuviDownloader -> usequvi
|
QuviDownloader -> usequvi
|
||||||
_ ->
|
_ ->
|
||||||
#ifdef WITH_QUVI
|
#ifdef WITH_QUVI
|
||||||
ifM (quviSupported s')
|
ifM (quviSupported urlstring)
|
||||||
( usequvi
|
( usequvi
|
||||||
, regulardownload url
|
, regulardownload url
|
||||||
)
|
)
|
||||||
|
@ -139,30 +138,44 @@ startWeb relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s
|
||||||
#endif
|
#endif
|
||||||
regulardownload url = do
|
regulardownload url = do
|
||||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||||
let file = choosefile $ url2file url pathdepth pathmax
|
urlinfo <- if relaxed
|
||||||
|
then pure $ Url.UrlInfo True Nothing Nothing
|
||||||
|
else Url.withUrlOptions (Url.getUrlInfo urlstring)
|
||||||
|
file <- case optfile of
|
||||||
|
Just f -> pure f
|
||||||
|
Nothing -> case Url.urlSuggestedFile urlinfo of
|
||||||
|
Nothing -> pure $ url2file url pathdepth pathmax
|
||||||
|
Just sf -> do
|
||||||
|
let f = truncateFilePath pathmax $
|
||||||
|
sanitizeFilePath sf
|
||||||
|
ifM (liftIO $ doesFileExist f <||> doesDirectoryExist f)
|
||||||
|
( pure $ url2file url pathdepth pathmax
|
||||||
|
, pure f
|
||||||
|
)
|
||||||
showStart "addurl" file
|
showStart "addurl" file
|
||||||
next $ performWeb relaxed s' file
|
next $ performWeb relaxed urlstring file urlinfo
|
||||||
#ifdef WITH_QUVI
|
#ifdef WITH_QUVI
|
||||||
badquvi = error $ "quvi does not know how to download url " ++ s'
|
badquvi = error $ "quvi does not know how to download url " ++ urlstring
|
||||||
usequvi = do
|
usequvi = do
|
||||||
page <- fromMaybe badquvi
|
page <- fromMaybe badquvi
|
||||||
<$> withQuviOptions Quvi.forceQuery [Quvi.quiet, Quvi.httponly] s'
|
<$> withQuviOptions Quvi.forceQuery [Quvi.quiet, Quvi.httponly] urlstring
|
||||||
let link = fromMaybe badquvi $ headMaybe $ Quvi.pageLinks page
|
let link = fromMaybe badquvi $ headMaybe $ Quvi.pageLinks page
|
||||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||||
let file = choosefile $ truncateFilePath pathmax $ sanitizeFilePath $
|
let file = flip fromMaybe optfile $
|
||||||
|
truncateFilePath pathmax $ sanitizeFilePath $
|
||||||
Quvi.pageTitle page ++ "." ++ Quvi.linkSuffix link
|
Quvi.pageTitle page ++ "." ++ Quvi.linkSuffix link
|
||||||
showStart "addurl" file
|
showStart "addurl" file
|
||||||
next $ performQuvi relaxed s' (Quvi.linkUrl link) file
|
next $ performQuvi relaxed urlstring (Quvi.linkUrl link) file
|
||||||
#else
|
#else
|
||||||
usequvi = error "not built with quvi support"
|
usequvi = error "not built with quvi support"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
performWeb :: Bool -> URLString -> FilePath -> CommandPerform
|
performWeb :: Bool -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform
|
||||||
performWeb relaxed url file = ifAnnexed file addurl geturl
|
performWeb relaxed url file urlinfo = ifAnnexed file addurl geturl
|
||||||
where
|
where
|
||||||
geturl = next $ isJust <$> addUrlFile relaxed url file
|
geturl = next $ isJust <$> addUrlFile relaxed url urlinfo file
|
||||||
addurl = addUrlChecked relaxed url webUUID checkexistssize
|
addurl = addUrlChecked relaxed url webUUID $ \k -> return $
|
||||||
checkexistssize = Url.withUrlOptions . Url.check url . keySize
|
(Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k)
|
||||||
|
|
||||||
#ifdef WITH_QUVI
|
#ifdef WITH_QUVI
|
||||||
performQuvi :: Bool -> URLString -> URLString -> FilePath -> CommandPerform
|
performQuvi :: Bool -> URLString -> URLString -> FilePath -> CommandPerform
|
||||||
|
@ -189,7 +202,8 @@ addUrlFileQuvi relaxed quviurl videourl file = do
|
||||||
- retained, because the size of a video stream
|
- retained, because the size of a video stream
|
||||||
- might change and we want to be able to download
|
- might change and we want to be able to download
|
||||||
- it later. -}
|
- it later. -}
|
||||||
sizedkey <- addSizeUrlKey videourl key
|
urlinfo <- Url.withUrlOptions (Url.getUrlInfo videourl)
|
||||||
|
let sizedkey = addSizeUrlKey urlinfo key
|
||||||
prepGetViaTmpChecked sizedkey Nothing $ do
|
prepGetViaTmpChecked sizedkey Nothing $ do
|
||||||
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
|
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
|
||||||
showOutput
|
showOutput
|
||||||
|
@ -225,17 +239,17 @@ addUrlChecked relaxed url u checkexistssize key
|
||||||
stop
|
stop
|
||||||
)
|
)
|
||||||
|
|
||||||
addUrlFile :: Bool -> URLString -> FilePath -> Annex (Maybe Key)
|
addUrlFile :: Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
|
||||||
addUrlFile relaxed url file = do
|
addUrlFile relaxed url urlinfo file = do
|
||||||
liftIO $ createDirectoryIfMissing True (parentDir file)
|
liftIO $ createDirectoryIfMissing True (parentDir file)
|
||||||
ifM (Annex.getState Annex.fast <||> pure relaxed)
|
ifM (Annex.getState Annex.fast <||> pure relaxed)
|
||||||
( nodownload relaxed url file
|
( nodownload relaxed url urlinfo file
|
||||||
, downloadWeb url file
|
, downloadWeb url urlinfo file
|
||||||
)
|
)
|
||||||
|
|
||||||
downloadWeb :: URLString -> FilePath -> Annex (Maybe Key)
|
downloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
|
||||||
downloadWeb url file = do
|
downloadWeb url urlinfo file = do
|
||||||
dummykey <- addSizeUrlKey url =<< Backend.URL.fromUrl url Nothing
|
dummykey <- addSizeUrlKey urlinfo <$> Backend.URL.fromUrl url Nothing
|
||||||
let downloader f _ = do
|
let downloader f _ = do
|
||||||
showOutput
|
showOutput
|
||||||
downloadUrl [url] f
|
downloadUrl [url] f
|
||||||
|
@ -272,15 +286,9 @@ downloadWith downloader dummykey u url file =
|
||||||
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
||||||
downloader tmp p
|
downloader tmp p
|
||||||
|
|
||||||
{- Hits the url to get the size, if available.
|
{- Adds the url size to the Key. -}
|
||||||
-
|
addSizeUrlKey :: Url.UrlInfo -> Key -> Key
|
||||||
- This is needed to avoid exceeding the diskreserve when downloading,
|
addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo }
|
||||||
- and so the assistant can display a pretty progress bar.
|
|
||||||
-}
|
|
||||||
addSizeUrlKey :: URLString -> Key -> Annex Key
|
|
||||||
addSizeUrlKey url key = do
|
|
||||||
size <- snd <$> Url.withUrlOptions (Url.exists url)
|
|
||||||
return $ key { keySize = size }
|
|
||||||
|
|
||||||
cleanup :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
|
cleanup :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
|
||||||
cleanup u url file key mtmp = do
|
cleanup u url file key mtmp = do
|
||||||
|
@ -295,17 +303,13 @@ cleanup u url file key mtmp = do
|
||||||
Annex.Queue.flush
|
Annex.Queue.flush
|
||||||
maybe noop (moveAnnex key) mtmp
|
maybe noop (moveAnnex key) mtmp
|
||||||
|
|
||||||
nodownload :: Bool -> URLString -> FilePath -> Annex (Maybe Key)
|
nodownload :: Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
|
||||||
nodownload relaxed url file = do
|
nodownload relaxed url urlinfo file
|
||||||
(exists, size) <- if relaxed
|
| Url.urlExists urlinfo = do
|
||||||
then pure (True, Nothing)
|
key <- Backend.URL.fromUrl url (Url.urlSize urlinfo)
|
||||||
else Url.withUrlOptions (Url.exists url)
|
|
||||||
if exists
|
|
||||||
then do
|
|
||||||
key <- Backend.URL.fromUrl url size
|
|
||||||
cleanup webUUID url file key Nothing
|
cleanup webUUID url file key Nothing
|
||||||
return (Just key)
|
return (Just key)
|
||||||
else do
|
| otherwise = do
|
||||||
warning $ "unable to access url: " ++ url
|
warning $ "unable to access url: " ++ url
|
||||||
return Nothing
|
return Nothing
|
||||||
|
|
||||||
|
|
|
@ -144,7 +144,9 @@ performDownload relaxed cache todownload = case location todownload of
|
||||||
rundownload url (takeExtension url) $ \f -> do
|
rundownload url (takeExtension url) $ \f -> do
|
||||||
r <- Remote.claimingUrl url
|
r <- Remote.claimingUrl url
|
||||||
if Remote.uuid r == webUUID
|
if Remote.uuid r == webUUID
|
||||||
then maybeToList <$> addUrlFile relaxed url f
|
then do
|
||||||
|
urlinfo <- Url.withUrlOptions (Url.getUrlInfo url)
|
||||||
|
maybeToList <$> addUrlFile relaxed url urlinfo f
|
||||||
else do
|
else do
|
||||||
res <- tryNonAsync $ maybe
|
res <- tryNonAsync $ maybe
|
||||||
(error $ "unable to checkUrl of " ++ Remote.name r)
|
(error $ "unable to checkUrl of " ++ Remote.name r)
|
||||||
|
|
|
@ -17,6 +17,8 @@ module Utility.Url (
|
||||||
check,
|
check,
|
||||||
checkBoth,
|
checkBoth,
|
||||||
exists,
|
exists,
|
||||||
|
UrlInfo(..),
|
||||||
|
getUrlInfo,
|
||||||
download,
|
download,
|
||||||
downloadQuiet,
|
downloadQuiet,
|
||||||
parseURIRelaxed
|
parseURIRelaxed
|
||||||
|
@ -84,18 +86,27 @@ checkBoth url expected_size uo = do
|
||||||
v <- check url expected_size uo
|
v <- check url expected_size uo
|
||||||
return (fst v && snd v)
|
return (fst v && snd v)
|
||||||
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
|
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
|
||||||
check url expected_size = go <$$> exists url
|
check url expected_size = go <$$> getUrlInfo url
|
||||||
where
|
where
|
||||||
go (False, _) = (False, False)
|
go (UrlInfo False _ _) = (False, False)
|
||||||
go (True, Nothing) = (True, True)
|
go (UrlInfo True Nothing _) = (True, True)
|
||||||
go (True, s) = case expected_size of
|
go (UrlInfo True s _) = case expected_size of
|
||||||
Just _ -> (True, expected_size == s)
|
Just _ -> (True, expected_size == s)
|
||||||
Nothing -> (True, True)
|
Nothing -> (True, True)
|
||||||
|
|
||||||
|
exists :: URLString -> UrlOptions -> IO Bool
|
||||||
|
exists url uo = urlExists <$> getUrlInfo url uo
|
||||||
|
|
||||||
|
data UrlInfo = UrlInfo
|
||||||
|
{ urlExists :: Bool
|
||||||
|
, urlSize :: Maybe Integer
|
||||||
|
, urlSuggestedFile :: Maybe FilePath
|
||||||
|
}
|
||||||
|
|
||||||
{- Checks that an url exists and could be successfully downloaded,
|
{- Checks that an url exists and could be successfully downloaded,
|
||||||
- also returning its size if available. -}
|
- also returning its size and suggested filename if available. -}
|
||||||
exists :: URLString -> UrlOptions -> IO (Bool, Maybe Integer)
|
getUrlInfo :: URLString -> UrlOptions -> IO UrlInfo
|
||||||
exists url uo = case parseURIRelaxed url of
|
getUrlInfo url uo = case parseURIRelaxed url of
|
||||||
Just u -> case parseUrl (show u) of
|
Just u -> case parseUrl (show u) of
|
||||||
Just req -> existsconduit req `catchNonAsync` const dne
|
Just req -> existsconduit req `catchNonAsync` const dne
|
||||||
-- http-conduit does not support file:, ftp:, etc urls,
|
-- http-conduit does not support file:, ftp:, etc urls,
|
||||||
|
@ -107,18 +118,21 @@ exists url uo = case parseURIRelaxed url of
|
||||||
case s of
|
case s of
|
||||||
Just stat -> do
|
Just stat -> do
|
||||||
sz <- getFileSize' f stat
|
sz <- getFileSize' f stat
|
||||||
return (True, Just sz)
|
found (Just sz) Nothing
|
||||||
Nothing -> dne
|
Nothing -> dne
|
||||||
| Build.SysConfig.curl -> do
|
| Build.SysConfig.curl -> do
|
||||||
output <- catchDefaultIO "" $
|
output <- catchDefaultIO "" $
|
||||||
readProcess "curl" $ toCommand curlparams
|
readProcess "curl" $ toCommand curlparams
|
||||||
case lastMaybe (lines output) of
|
case lastMaybe (lines output) of
|
||||||
Just ('2':_:_) -> return (True, extractlencurl output)
|
Just ('2':_:_) -> found
|
||||||
|
(extractlencurl output)
|
||||||
|
Nothing
|
||||||
_ -> dne
|
_ -> dne
|
||||||
| otherwise -> dne
|
| otherwise -> dne
|
||||||
Nothing -> dne
|
Nothing -> dne
|
||||||
where
|
where
|
||||||
dne = return (False, Nothing)
|
dne = return $ UrlInfo False Nothing Nothing
|
||||||
|
found sz f = return $ UrlInfo True sz f
|
||||||
|
|
||||||
curlparams = addUserAgent uo $
|
curlparams = addUserAgent uo $
|
||||||
[ Param "-s"
|
[ Param "-s"
|
||||||
|
@ -133,23 +147,36 @@ exists url uo = case parseURIRelaxed url of
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|
||||||
extractlen resp = readish . B8.toString =<< headMaybe lenheaders
|
extractlen = readish . B8.toString <=< firstheader hContentLength
|
||||||
where
|
|
||||||
lenheaders = map snd $
|
extractfilename = contentDispositionFilename . B8.toString
|
||||||
filter (\(h, _) -> h == hContentLength)
|
<=< firstheader hContentDisposition
|
||||||
(responseHeaders resp)
|
|
||||||
|
firstheader h = headMaybe . map snd .
|
||||||
|
filter (\p -> fst p == h) . responseHeaders
|
||||||
|
|
||||||
existsconduit req = withManager $ \mgr -> do
|
existsconduit req = withManager $ \mgr -> do
|
||||||
let req' = headRequest (applyRequest uo req)
|
let req' = headRequest (applyRequest uo req)
|
||||||
resp <- http req' mgr
|
resp <- http req' mgr
|
||||||
-- forces processing the response before the
|
-- forces processing the response before the
|
||||||
-- manager is closed
|
-- manager is closed
|
||||||
ret <- if responseStatus resp == ok200
|
ret <- liftIO $ if responseStatus resp == ok200
|
||||||
then return (True, extractlen resp)
|
then found
|
||||||
else liftIO dne
|
(extractlen resp)
|
||||||
|
(extractfilename resp)
|
||||||
|
else dne
|
||||||
liftIO $ closeManager mgr
|
liftIO $ closeManager mgr
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
-- Parse eg: attachment; filename="fname.ext"
|
||||||
|
-- per RFC 2616
|
||||||
|
contentDispositionFilename :: String -> Maybe FilePath
|
||||||
|
contentDispositionFilename s
|
||||||
|
| "attachment; filename=\"" `isPrefixOf` s && "\"" `isSuffixOf` s =
|
||||||
|
Just $ reverse $ drop 1 $ reverse $
|
||||||
|
drop 1 $ dropWhile (/= '"') s
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
#if MIN_VERSION_http_conduit(2,0,0)
|
#if MIN_VERSION_http_conduit(2,0,0)
|
||||||
headRequest :: Request -> Request
|
headRequest :: Request -> Request
|
||||||
#else
|
#else
|
||||||
|
@ -229,6 +256,9 @@ parseURIRelaxed = parseURI . escapeURIString isAllowedInURI
|
||||||
hAcceptEncoding :: CI.CI B.ByteString
|
hAcceptEncoding :: CI.CI B.ByteString
|
||||||
hAcceptEncoding = "Accept-Encoding"
|
hAcceptEncoding = "Accept-Encoding"
|
||||||
|
|
||||||
|
hContentDisposition :: CI.CI B.ByteString
|
||||||
|
hContentDisposition = "Content-Disposition"
|
||||||
|
|
||||||
#if ! MIN_VERSION_http_types(0,7,0)
|
#if ! MIN_VERSION_http_types(0,7,0)
|
||||||
hContentLength :: CI.CI B.ByteString
|
hContentLength :: CI.CI B.ByteString
|
||||||
hContentLength = "Content-Length"
|
hContentLength = "Content-Length"
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -16,6 +16,9 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
|
||||||
* Avoid using fileSize which maxes out at just 2 gb on Windows.
|
* Avoid using fileSize which maxes out at just 2 gb on Windows.
|
||||||
Instead, use hFileSize, which doesn't have a bounded size.
|
Instead, use hFileSize, which doesn't have a bounded size.
|
||||||
Fixes support for files > 2 gb on Windows.
|
Fixes support for files > 2 gb on Windows.
|
||||||
|
* addurl: When a Content-Disposition header suggests a filename to use,
|
||||||
|
addurl will consider using it, if it's reasonable and doesn't conflict
|
||||||
|
with an existing file. (--file overrides this)
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
|
||||||
|
|
||||||
|
|
|
@ -218,8 +218,10 @@ subdirectories).
|
||||||
is there at a future point, specify `--relaxed`. (Implies `--fast`.)
|
is there at a future point, specify `--relaxed`. (Implies `--fast`.)
|
||||||
|
|
||||||
Normally the filename is based on the full url, so will look like
|
Normally the filename is based on the full url, so will look like
|
||||||
"www.example.com_dir_subdir_bigfile". For a shorter filename, specify
|
"www.example.com_dir_subdir_bigfile". In some cases, addurl is able to
|
||||||
`--pathdepth=N`. For example, `--pathdepth=1` will use "dir/subdir/bigfile",
|
come up with a better filename based on other information. Or, for a
|
||||||
|
shorter filename, specify `--pathdepth=N`. For example,
|
||||||
|
`--pathdepth=1` will use "dir/subdir/bigfile",
|
||||||
while `--pathdepth=3` will use "bigfile". It can also be negative;
|
while `--pathdepth=3` will use "bigfile". It can also be negative;
|
||||||
`--pathdepth=-2` will use the last two parts of the url.
|
`--pathdepth=-2` will use the last two parts of the url.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue