importfeed: Support -J (and work toward supporting --json)
Both -J and --json needed importfeed to be refactored to use commandAction. That was difficult, because of the interrelated nature of downloading feeds and then downloading files from feeds, both of which needed to use commandAction. And then checking for problems in feeds has to come after these actions, which may be run as background jobs. As for --json support, it's most of the way there, but still has some warts, so I didn't enable jsonOptions yet. The warts include: - An initial empty json record is displayed by getCache. - Input is not populated, should be feed url - feedProblem at end will not be captured by --json-error-messages (see FIXME) Sponsored-By: the NIH-funded NICEMAN (ReproNim TR&D3) project
This commit is contained in:
parent
a71c831949
commit
04ee6c4c6b
5 changed files with 247 additions and 146 deletions
|
@ -46,6 +46,7 @@ git-annex (10.20230408) UNRELEASED; urgency=medium
|
||||||
before.
|
before.
|
||||||
* addunused: Displays the names of the files that it adds.
|
* addunused: Displays the names of the files that it adds.
|
||||||
* reinject: Fix support for operating on multiple pairs of files and keys.
|
* reinject: Fix support for operating on multiple pairs of files and keys.
|
||||||
|
* importfeed: Support -J
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Sat, 08 Apr 2023 13:57:18 -0400
|
-- Joey Hess <id@joeyh.name> Sat, 08 Apr 2023 13:57:18 -0400
|
||||||
|
|
||||||
|
|
|
@ -286,7 +286,7 @@ performWeb addunlockedmatcher o url file urlinfo = lookupKey file >>= \case
|
||||||
addurl = addUrlChecked o url file webUUID $ \k ->
|
addurl = addUrlChecked o url file webUUID $ \k ->
|
||||||
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
|
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
|
||||||
( return (Just (True, True, setDownloader url YoutubeDownloader))
|
( return (Just (True, True, setDownloader url YoutubeDownloader))
|
||||||
, checkRaw Nothing (downloadOptions o) Nothing $
|
, checkRaw Nothing (downloadOptions o) (pure Nothing) $
|
||||||
return (Just (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url))
|
return (Just (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -348,7 +348,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
|
||||||
in lookupKey f >>= \case
|
in lookupKey f >>= \case
|
||||||
Just k -> alreadyannexed f k
|
Just k -> alreadyannexed f k
|
||||||
Nothing -> dl f
|
Nothing -> dl f
|
||||||
Left err -> checkRaw (Just err) o Nothing (normalfinish tmp backend)
|
Left err -> checkRaw (Just err) o (pure Nothing) (normalfinish tmp backend)
|
||||||
where
|
where
|
||||||
dl dest = withTmpWorkDir mediakey $ \workdir -> do
|
dl dest = withTmpWorkDir mediakey $ \workdir -> do
|
||||||
let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)
|
let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)
|
||||||
|
@ -362,7 +362,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
|
||||||
showDestinationFile dest
|
showDestinationFile dest
|
||||||
addWorkTree canadd addunlockedmatcher webUUID mediaurl dest mediakey (Just (toRawFilePath mediafile))
|
addWorkTree canadd addunlockedmatcher webUUID mediaurl dest mediakey (Just (toRawFilePath mediafile))
|
||||||
return $ Just mediakey
|
return $ Just mediakey
|
||||||
Right Nothing -> checkRaw Nothing o Nothing (normalfinish tmp backend)
|
Right Nothing -> checkRaw Nothing o (pure Nothing) (normalfinish tmp backend)
|
||||||
Left msg -> do
|
Left msg -> do
|
||||||
cleanuptmp
|
cleanuptmp
|
||||||
warning (UnquotedString msg)
|
warning (UnquotedString msg)
|
||||||
|
@ -379,14 +379,14 @@ downloadWeb addunlockedmatcher o url urlinfo file =
|
||||||
warning $ QuotedPath dest <> " already exists; not overwriting"
|
warning $ QuotedPath dest <> " already exists; not overwriting"
|
||||||
return Nothing
|
return Nothing
|
||||||
|
|
||||||
checkRaw :: (Maybe String) -> DownloadOptions -> a -> Annex a -> Annex a
|
checkRaw :: (Maybe String) -> DownloadOptions -> Annex a -> Annex a -> Annex a
|
||||||
checkRaw failreason o f a
|
checkRaw failreason o f a
|
||||||
| noRawOption o = do
|
| noRawOption o = do
|
||||||
warning $ UnquotedString $ "Unable to use youtube-dl or a special remote and --no-raw was specified" ++
|
warning $ UnquotedString $ "Unable to use youtube-dl or a special remote and --no-raw was specified" ++
|
||||||
case failreason of
|
case failreason of
|
||||||
Just msg -> ": " ++ msg
|
Just msg -> ": " ++ msg
|
||||||
Nothing -> ""
|
Nothing -> ""
|
||||||
return f
|
f
|
||||||
| otherwise = a
|
| otherwise = a
|
||||||
|
|
||||||
{- The destination file is not known at start time unless the user provided
|
{- The destination file is not known at start time unless the user provided
|
||||||
|
@ -504,7 +504,7 @@ nodownloadWeb addunlockedmatcher o url urlinfo file
|
||||||
then nomedia
|
then nomedia
|
||||||
else youtubeDlFileName url >>= \case
|
else youtubeDlFileName url >>= \case
|
||||||
Right mediafile -> usemedia (toRawFilePath mediafile)
|
Right mediafile -> usemedia (toRawFilePath mediafile)
|
||||||
Left err -> checkRaw (Just err) o Nothing nomedia
|
Left err -> checkRaw (Just err) o (pure Nothing) nomedia
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
warning $ UnquotedString $ "unable to access url: " ++ url
|
warning $ UnquotedString $ "unable to access url: " ++ url
|
||||||
return Nothing
|
return Nothing
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex command
|
{- git-annex command
|
||||||
-
|
-
|
||||||
- Copyright 2013-2021 Joey Hess <id@joeyh.name>
|
- Copyright 2013-2023 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -20,6 +20,7 @@ import Data.Time.Clock
|
||||||
import Data.Time.Format
|
import Data.Time.Format
|
||||||
import Data.Time.Calendar
|
import Data.Time.Calendar
|
||||||
import Data.Time.LocalTime
|
import Data.Time.LocalTime
|
||||||
|
import Control.Concurrent.STM
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import qualified Data.Text.Encoding as TE
|
import qualified Data.Text.Encoding as TE
|
||||||
import qualified System.FilePath.ByteString as P
|
import qualified System.FilePath.ByteString as P
|
||||||
|
@ -53,7 +54,7 @@ import Logs
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
cmd = notBareRepo $ withAnnexOptions [backendOption] $
|
cmd = notBareRepo $ withAnnexOptions [jobsOption, backendOption] $
|
||||||
command "importfeed" SectionCommon "import files from podcast feeds"
|
command "importfeed" SectionCommon "import files from podcast feeds"
|
||||||
(paramRepeating paramUrl) (seek <$$> optParser)
|
(paramRepeating paramUrl) (seek <$$> optParser)
|
||||||
|
|
||||||
|
@ -73,25 +74,96 @@ optParser desc = ImportFeedOptions
|
||||||
<*> parseDownloadOptions False
|
<*> parseDownloadOptions False
|
||||||
|
|
||||||
seek :: ImportFeedOptions -> CommandSeek
|
seek :: ImportFeedOptions -> CommandSeek
|
||||||
seek o = do
|
seek o = startConcurrency commandStages $ do
|
||||||
addunlockedmatcher <- addUnlockedMatcher
|
addunlockedmatcher <- addUnlockedMatcher
|
||||||
cache <- getCache (templateOption o)
|
cache <- getCache (templateOption o)
|
||||||
forM_ (feedUrls o) (getFeed addunlockedmatcher o cache)
|
dlst <- liftIO $ newTMVarIO M.empty
|
||||||
|
checkst <- liftIO $ newTVarIO M.empty
|
||||||
|
|
||||||
getFeed :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> URLString -> CommandSeek
|
forM_ (feedUrls o) $ \url -> do
|
||||||
getFeed addunlockedmatcher opts cache url = do
|
liftIO $ atomically $ do
|
||||||
showStartMessage (StartMessage "importfeed" (ActionItemOther (Just (UnquotedString url))) (SeekInput []))
|
m <- takeTMVar dlst
|
||||||
withTmpFile "feed" $ \tmpf h -> do
|
putTMVar dlst (M.insert url Nothing m)
|
||||||
|
commandAction $ getFeed url dlst
|
||||||
|
startpendingdownloads addunlockedmatcher cache dlst checkst False
|
||||||
|
|
||||||
|
startpendingdownloads addunlockedmatcher cache dlst checkst True
|
||||||
|
|
||||||
|
checkfeedproblems checkst
|
||||||
|
where
|
||||||
|
getpendingdownloads dlst blocking
|
||||||
|
| blocking = do
|
||||||
|
m <- takeTMVar dlst
|
||||||
|
if M.null m
|
||||||
|
then do
|
||||||
|
putTMVar dlst m
|
||||||
|
return m
|
||||||
|
else
|
||||||
|
let (pending, rest) = M.partition ispending m
|
||||||
|
in if M.null pending
|
||||||
|
then retry
|
||||||
|
else do
|
||||||
|
putTMVar dlst rest
|
||||||
|
return pending
|
||||||
|
| otherwise = do
|
||||||
|
m <- takeTMVar dlst
|
||||||
|
let (pending, rest) = M.partition ispending m
|
||||||
|
putTMVar dlst rest
|
||||||
|
return pending
|
||||||
|
where
|
||||||
|
ispending Nothing = False
|
||||||
|
ispending (Just _) = True
|
||||||
|
|
||||||
|
startpendingdownloads addunlockedmatcher cache dlst checkst blocking = do
|
||||||
|
m <- liftIO $ atomically $ getpendingdownloads dlst blocking
|
||||||
|
|
||||||
|
forM_ (M.toList m) $ \(url, v) -> case v of
|
||||||
|
Nothing -> noop
|
||||||
|
Just Nothing -> noop
|
||||||
|
Just (Just is) ->
|
||||||
|
forM_ is $ \i -> do
|
||||||
|
cv <- liftIO newEmptyTMVarIO
|
||||||
|
liftIO $ atomically $ modifyTVar checkst $
|
||||||
|
M.insertWith (++) url [cv]
|
||||||
|
commandAction $
|
||||||
|
startDownload addunlockedmatcher o cache cv i
|
||||||
|
|
||||||
|
checkfeedproblems checkst = do
|
||||||
|
m <- liftIO $ atomically $ readTVar checkst
|
||||||
|
forM_ (M.toList m) $ \(url, cvl) ->
|
||||||
|
ifM (and <$> mapM (liftIO . atomically . takeTMVar) cvl)
|
||||||
|
( clearFeedProblem url
|
||||||
|
-- FIXME: This will not be captured in json
|
||||||
|
, void $ feedProblem url
|
||||||
|
"problem downloading some item(s) from feed"
|
||||||
|
)
|
||||||
|
|
||||||
|
getFeed
|
||||||
|
:: URLString
|
||||||
|
-> TMVar (M.Map URLString (Maybe (Maybe [ToDownload])))
|
||||||
|
-> CommandStart
|
||||||
|
getFeed url st =
|
||||||
|
starting "importfeed" (ActionItemOther (Just (UnquotedString url))) (SeekInput []) $
|
||||||
|
get `onException` recordfail
|
||||||
|
where
|
||||||
|
record v = liftIO $ atomically $ do
|
||||||
|
m <- takeTMVar st
|
||||||
|
putTMVar st (M.insert url v m)
|
||||||
|
recordfail = record (Just Nothing)
|
||||||
|
|
||||||
|
get = withTmpFile "feed" $ \tmpf h -> do
|
||||||
liftIO $ hClose h
|
liftIO $ hClose h
|
||||||
ifM (downloadFeed url tmpf)
|
ifM (downloadFeed url tmpf)
|
||||||
( go tmpf
|
( parse tmpf
|
||||||
, showEndResult =<< feedProblem url
|
, do
|
||||||
"downloading the feed failed"
|
recordfail
|
||||||
|
next $ feedProblem url
|
||||||
|
"downloading the feed failed"
|
||||||
)
|
)
|
||||||
where
|
|
||||||
-- Use parseFeedFromFile rather than reading the file
|
-- Use parseFeedFromFile rather than reading the file
|
||||||
-- ourselves because it goes out of its way to handle encodings.
|
-- ourselves because it goes out of its way to handle encodings.
|
||||||
go tmpf = liftIO (parseFeedFromFile' tmpf) >>= \case
|
parse tmpf = liftIO (parseFeedFromFile' tmpf) >>= \case
|
||||||
Nothing -> debugfeedcontent tmpf "parsing the feed failed"
|
Nothing -> debugfeedcontent tmpf "parsing the feed failed"
|
||||||
Just f -> do
|
Just f -> do
|
||||||
case decodeBS $ fromFeedText $ getFeedTitle f of
|
case decodeBS $ fromFeedText $ getFeedTitle f of
|
||||||
|
@ -100,12 +172,9 @@ getFeed addunlockedmatcher opts cache url = do
|
||||||
case findDownloads url f of
|
case findDownloads url f of
|
||||||
[] -> debugfeedcontent tmpf "bad feed content; no enclosures to download"
|
[] -> debugfeedcontent tmpf "bad feed content; no enclosures to download"
|
||||||
l -> do
|
l -> do
|
||||||
showEndOk
|
record (Just (Just l))
|
||||||
ifM (and <$> mapM (performDownload addunlockedmatcher opts cache) l)
|
next $ return True
|
||||||
( clearFeedProblem url
|
|
||||||
, void $ feedProblem url
|
|
||||||
"problem downloading some item(s) from feed"
|
|
||||||
)
|
|
||||||
debugfeedcontent tmpf msg = do
|
debugfeedcontent tmpf msg = do
|
||||||
feedcontent <- liftIO $ readFile tmpf
|
feedcontent <- liftIO $ readFile tmpf
|
||||||
fastDebug "Command.ImportFeed" $ unlines
|
fastDebug "Command.ImportFeed" $ unlines
|
||||||
|
@ -113,7 +182,8 @@ getFeed addunlockedmatcher opts cache url = do
|
||||||
, feedcontent
|
, feedcontent
|
||||||
, "end of feed content"
|
, "end of feed content"
|
||||||
]
|
]
|
||||||
showEndResult =<< feedProblem url
|
recordfail
|
||||||
|
next $ feedProblem url
|
||||||
(msg ++ " (use --debug --debugfilter=ImportFeed to see the feed content that was downloaded)")
|
(msg ++ " (use --debug --debugfilter=ImportFeed to see the feed content that was downloaded)")
|
||||||
|
|
||||||
parseFeedFromFile' :: FilePath -> IO (Maybe Feed)
|
parseFeedFromFile' :: FilePath -> IO (Maybe Feed)
|
||||||
|
@ -199,51 +269,9 @@ downloadFeed url f
|
||||||
| otherwise = Url.withUrlOptions $
|
| otherwise = Url.withUrlOptions $
|
||||||
Url.download nullMeterUpdate Nothing url f
|
Url.download nullMeterUpdate Nothing url f
|
||||||
|
|
||||||
performDownload :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> ToDownload -> Annex Bool
|
startDownload :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> TMVar Bool -> ToDownload -> CommandStart
|
||||||
performDownload = performDownload' False
|
startDownload addunlockedmatcher opts cache cv todownload = case location todownload of
|
||||||
|
Enclosure url -> startdownloadenclosure url
|
||||||
performDownload' :: Bool -> AddUnlockedMatcher -> ImportFeedOptions -> Cache -> ToDownload -> Annex Bool
|
|
||||||
performDownload' started addunlockedmatcher opts cache todownload = case location todownload of
|
|
||||||
Enclosure url -> checkknown url $ do
|
|
||||||
starturl url
|
|
||||||
rundownload url (takeWhile (/= '?') $ takeExtension url) $ \f -> do
|
|
||||||
let f' = fromRawFilePath f
|
|
||||||
r <- Remote.claimingUrl url
|
|
||||||
if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
|
|
||||||
then checkRaw (Just url) (downloadOptions opts) Nothing $ do
|
|
||||||
let dlopts = (downloadOptions opts)
|
|
||||||
-- force using the filename
|
|
||||||
-- chosen here
|
|
||||||
{ fileOption = Just f'
|
|
||||||
-- don't use youtube-dl
|
|
||||||
, rawOption = True
|
|
||||||
}
|
|
||||||
let go urlinfo = Just . maybeToList <$> addUrlFile addunlockedmatcher dlopts url urlinfo f
|
|
||||||
if relaxedOption (downloadOptions opts)
|
|
||||||
then go Url.assumeUrlExists
|
|
||||||
else Url.withUrlOptions (Url.getUrlInfo url) >>= \case
|
|
||||||
Right urlinfo -> go urlinfo
|
|
||||||
Left err -> do
|
|
||||||
warning (UnquotedString err)
|
|
||||||
return (Just [])
|
|
||||||
else do
|
|
||||||
res <- tryNonAsync $ maybe
|
|
||||||
(giveup $ "unable to checkUrl of " ++ Remote.name r)
|
|
||||||
(flip id url)
|
|
||||||
(Remote.checkUrl r)
|
|
||||||
case res of
|
|
||||||
Left _ -> return (Just [])
|
|
||||||
Right (UrlContents sz _) ->
|
|
||||||
Just . maybeToList <$>
|
|
||||||
downloadRemoteFile addunlockedmatcher r (downloadOptions opts) url f sz
|
|
||||||
Right (UrlMulti l) -> do
|
|
||||||
kl <- forM l $ \(url', sz, subf) ->
|
|
||||||
let dest = f P.</> toRawFilePath (sanitizeFilePath subf)
|
|
||||||
in downloadRemoteFile addunlockedmatcher r (downloadOptions opts) url' dest sz
|
|
||||||
return $ Just $ if all isJust kl
|
|
||||||
then catMaybes kl
|
|
||||||
else []
|
|
||||||
|
|
||||||
MediaLink linkurl -> do
|
MediaLink linkurl -> do
|
||||||
let mediaurl = setDownloader linkurl YoutubeDownloader
|
let mediaurl = setDownloader linkurl YoutubeDownloader
|
||||||
let mediakey = Backend.URL.fromUrl mediaurl Nothing
|
let mediakey = Backend.URL.fromUrl mediaurl Nothing
|
||||||
|
@ -263,45 +291,149 @@ performDownload' started addunlockedmatcher opts cache todownload = case locatio
|
||||||
- associated with a file in the annex, unless forced. -}
|
- associated with a file in the annex, unless forced. -}
|
||||||
checkknown url a
|
checkknown url a
|
||||||
| knownitemid || S.member url (knownurls cache)
|
| knownitemid || S.member url (knownurls cache)
|
||||||
= ifM forced (a, return True)
|
= ifM forced (a, nothingtodo)
|
||||||
| otherwise = a
|
| otherwise = a
|
||||||
|
|
||||||
|
nothingtodo = recordsuccess >> stop
|
||||||
|
|
||||||
|
recordsuccess = liftIO $ atomically $ putTMVar cv True
|
||||||
|
|
||||||
|
startdownloadenclosure :: URLString -> CommandStart
|
||||||
|
startdownloadenclosure url = checkknown url $ startUrlDownload cv url $
|
||||||
|
downloadEnclosure addunlockedmatcher opts cache cv todownload url
|
||||||
|
|
||||||
knownitemid = case getItemId (item todownload) of
|
knownitemid = case getItemId (item todownload) of
|
||||||
Just (_, itemid) ->
|
Just (_, itemid) ->
|
||||||
S.member (decodeBS $ fromFeedText itemid) (knownitems cache)
|
S.member (decodeBS $ fromFeedText itemid) (knownitems cache)
|
||||||
_ -> False
|
_ -> False
|
||||||
|
|
||||||
rundownload url extension getter = do
|
downloadmedia linkurl mediaurl mediakey
|
||||||
dest <- makeunique url (1 :: Integer) $
|
| rawOption (downloadOptions opts) = startdownloadlink
|
||||||
feedFile (template cache) todownload extension
|
| otherwise = ifM (youtubeDlSupported linkurl)
|
||||||
case dest of
|
( startUrlDownload cv linkurl $
|
||||||
Nothing -> return True
|
withTmpWorkDir mediakey $ \workdir -> do
|
||||||
Just f -> getter (toRawFilePath f) >>= \case
|
dl <- youtubeDl linkurl (fromRawFilePath workdir) nullMeterUpdate
|
||||||
Just ks
|
case dl of
|
||||||
-- Download problem.
|
Right (Just mediafile) -> do
|
||||||
| null ks -> do
|
let ext = case takeExtension mediafile of
|
||||||
showEndFail
|
[] -> ".m"
|
||||||
checkFeedBroken (feedurl todownload)
|
s -> s
|
||||||
| otherwise -> do
|
runDownload todownload linkurl ext cache cv $ \f ->
|
||||||
forM_ ks $ \key ->
|
checkCanAdd (downloadOptions opts) f $ \canadd -> do
|
||||||
ifM (annexGenMetaData <$> Annex.getGitConfig)
|
addWorkTree canadd addunlockedmatcher webUUID mediaurl f mediakey (Just (toRawFilePath mediafile))
|
||||||
( addMetaData key $ extractMetaData todownload
|
return (Just [mediakey])
|
||||||
, addMetaData key $ minimalMetaData todownload
|
-- youtube-dl didn't support it, so
|
||||||
)
|
-- download it as if the link were
|
||||||
showEndOk
|
-- an enclosure.
|
||||||
return True
|
Right Nothing -> contdownloadlink
|
||||||
-- Was not able to add anything, but not
|
Left msg -> do
|
||||||
-- because of a download problem.
|
warning $ UnquotedString $ linkurl ++ ": " ++ msg
|
||||||
Nothing -> do
|
liftIO $ atomically $ putTMVar cv False
|
||||||
showEndFail
|
next $ return False
|
||||||
return False
|
, startdownloadlink
|
||||||
|
)
|
||||||
|
where
|
||||||
|
startdownloadlink = checkRaw (Just linkurl) (downloadOptions opts) nothingtodo $
|
||||||
|
startdownloadenclosure linkurl
|
||||||
|
contdownloadlink = downloadEnclosure addunlockedmatcher opts cache cv todownload linkurl
|
||||||
|
|
||||||
|
addmediafast linkurl mediaurl mediakey =
|
||||||
|
ifM (pure (not (rawOption (downloadOptions opts)))
|
||||||
|
<&&> youtubeDlSupported linkurl)
|
||||||
|
( startUrlDownload cv linkurl $ do
|
||||||
|
runDownload todownload linkurl ".m" cache cv $ \f ->
|
||||||
|
checkCanAdd (downloadOptions opts) f $ \canadd -> do
|
||||||
|
addWorkTree canadd addunlockedmatcher webUUID mediaurl f mediakey Nothing
|
||||||
|
return (Just [mediakey])
|
||||||
|
, startdownloadenclosure linkurl
|
||||||
|
)
|
||||||
|
|
||||||
|
downloadEnclosure :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> TMVar Bool -> ToDownload -> URLString -> CommandPerform
|
||||||
|
downloadEnclosure addunlockedmatcher opts cache cv todownload url =
|
||||||
|
runDownload todownload url (takeWhile (/= '?') $ takeExtension url) cache cv $ \f -> do
|
||||||
|
let f' = fromRawFilePath f
|
||||||
|
r <- Remote.claimingUrl url
|
||||||
|
if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
|
||||||
|
then checkRaw (Just url) (downloadOptions opts) (pure Nothing) $ do
|
||||||
|
let dlopts = (downloadOptions opts)
|
||||||
|
-- force using the filename
|
||||||
|
-- chosen here
|
||||||
|
{ fileOption = Just f'
|
||||||
|
-- don't use youtube-dl
|
||||||
|
, rawOption = True
|
||||||
|
}
|
||||||
|
let go urlinfo = Just . maybeToList <$> addUrlFile addunlockedmatcher dlopts url urlinfo f
|
||||||
|
if relaxedOption (downloadOptions opts)
|
||||||
|
then go Url.assumeUrlExists
|
||||||
|
else Url.withUrlOptions (Url.getUrlInfo url) >>= \case
|
||||||
|
Right urlinfo -> go urlinfo
|
||||||
|
Left err -> do
|
||||||
|
warning (UnquotedString err)
|
||||||
|
return (Just [])
|
||||||
|
else do
|
||||||
|
res <- tryNonAsync $ maybe
|
||||||
|
(giveup $ "unable to checkUrl of " ++ Remote.name r)
|
||||||
|
(flip id url)
|
||||||
|
(Remote.checkUrl r)
|
||||||
|
case res of
|
||||||
|
Left _ -> return (Just [])
|
||||||
|
Right (UrlContents sz _) ->
|
||||||
|
Just . maybeToList <$>
|
||||||
|
downloadRemoteFile addunlockedmatcher r (downloadOptions opts) url f sz
|
||||||
|
Right (UrlMulti l) -> do
|
||||||
|
kl <- forM l $ \(url', sz, subf) ->
|
||||||
|
let dest = f P.</> toRawFilePath (sanitizeFilePath subf)
|
||||||
|
in downloadRemoteFile addunlockedmatcher r (downloadOptions opts) url' dest sz
|
||||||
|
return $ Just $ if all isJust kl
|
||||||
|
then catMaybes kl
|
||||||
|
else []
|
||||||
|
|
||||||
|
runDownload
|
||||||
|
:: ToDownload
|
||||||
|
-> URLString
|
||||||
|
-> String
|
||||||
|
-> Cache
|
||||||
|
-> TMVar Bool
|
||||||
|
-> (RawFilePath -> Annex (Maybe [Key]))
|
||||||
|
-> CommandPerform
|
||||||
|
runDownload todownload url extension cache cv getter = do
|
||||||
|
dest <- makeunique (1 :: Integer) $
|
||||||
|
feedFile (template cache) todownload extension
|
||||||
|
case dest of
|
||||||
|
Nothing -> do
|
||||||
|
recordsuccess
|
||||||
|
stop
|
||||||
|
Just f -> getter (toRawFilePath f) >>= \case
|
||||||
|
Just ks
|
||||||
|
-- Download problem.
|
||||||
|
| null ks -> do
|
||||||
|
liftIO . atomically . putTMVar cv
|
||||||
|
=<< checkFeedBroken (feedurl todownload)
|
||||||
|
next $ return False
|
||||||
|
| otherwise -> do
|
||||||
|
forM_ ks $ \key ->
|
||||||
|
ifM (annexGenMetaData <$> Annex.getGitConfig)
|
||||||
|
( addMetaData key $ extractMetaData todownload
|
||||||
|
, addMetaData key $ minimalMetaData todownload
|
||||||
|
)
|
||||||
|
recordsuccess
|
||||||
|
next $ return True
|
||||||
|
-- Was not able to add anything, but not
|
||||||
|
-- because of a download problem.
|
||||||
|
Nothing -> do
|
||||||
|
recordsuccess
|
||||||
|
next $ return False
|
||||||
|
where
|
||||||
|
recordsuccess = liftIO $ atomically $ putTMVar cv True
|
||||||
|
|
||||||
|
forced = Annex.getRead Annex.force
|
||||||
|
|
||||||
{- Find a unique filename to save the url to.
|
{- Find a unique filename to save the url to.
|
||||||
- If the file exists, prefixes it with a number.
|
- If the file exists, prefixes it with a number.
|
||||||
- When forced, the file may already exist and have the same
|
- When forced, the file may already exist and have the same
|
||||||
- url, in which case Nothing is returned as it does not need
|
- url, in which case Nothing is returned as it does not need
|
||||||
- to be re-downloaded. -}
|
- to be re-downloaded. -}
|
||||||
makeunique url n file = ifM alreadyexists
|
makeunique n file = ifM alreadyexists
|
||||||
( ifM forced
|
( ifM forced
|
||||||
( lookupKey (toRawFilePath f) >>= \case
|
( lookupKey (toRawFilePath f) >>= \case
|
||||||
Just k -> checksameurl k
|
Just k -> checksameurl k
|
||||||
|
@ -316,60 +448,20 @@ performDownload' started addunlockedmatcher opts cache todownload = case locatio
|
||||||
else
|
else
|
||||||
let (d, base) = splitFileName file
|
let (d, base) = splitFileName file
|
||||||
in d </> show n ++ "_" ++ base
|
in d </> show n ++ "_" ++ base
|
||||||
tryanother = makeunique url (n + 1) file
|
tryanother = makeunique (n + 1) file
|
||||||
alreadyexists = liftIO $ isJust <$> catchMaybeIO (R.getSymbolicLinkStatus (toRawFilePath f))
|
alreadyexists = liftIO $ isJust <$> catchMaybeIO (R.getSymbolicLinkStatus (toRawFilePath f))
|
||||||
checksameurl k = ifM (elem url <$> getUrls k)
|
checksameurl k = ifM (elem url <$> getUrls k)
|
||||||
( return Nothing
|
( return Nothing
|
||||||
, tryanother
|
, tryanother
|
||||||
)
|
)
|
||||||
|
|
||||||
downloadmedia linkurl mediaurl mediakey
|
|
||||||
| rawOption (downloadOptions opts) = downloadlink False
|
|
||||||
| otherwise = ifM (youtubeDlSupported linkurl)
|
|
||||||
( do
|
|
||||||
starturl linkurl
|
|
||||||
r <- withTmpWorkDir mediakey $ \workdir -> do
|
|
||||||
dl <- youtubeDl linkurl (fromRawFilePath workdir) nullMeterUpdate
|
|
||||||
case dl of
|
|
||||||
Right (Just mediafile) -> do
|
|
||||||
let ext = case takeExtension mediafile of
|
|
||||||
[] -> ".m"
|
|
||||||
s -> s
|
|
||||||
ok <- rundownload linkurl ext $ \f ->
|
|
||||||
checkCanAdd (downloadOptions opts) f $ \canadd -> do
|
|
||||||
addWorkTree canadd addunlockedmatcher webUUID mediaurl f mediakey (Just (toRawFilePath mediafile))
|
|
||||||
return (Just [mediakey])
|
|
||||||
return (Just ok)
|
|
||||||
-- youtube-dl didn't support it, so
|
|
||||||
-- download it as if the link were
|
|
||||||
-- an enclosure.
|
|
||||||
Right Nothing -> Just <$> downloadlink True
|
|
||||||
Left msg -> do
|
|
||||||
warning $ UnquotedString $ linkurl ++ ": " ++ msg
|
|
||||||
return Nothing
|
|
||||||
return (fromMaybe False r)
|
|
||||||
, downloadlink False
|
|
||||||
)
|
|
||||||
where
|
|
||||||
downloadlink started' = checkRaw (Just linkurl) (downloadOptions opts) False $
|
|
||||||
performDownload' started' addunlockedmatcher opts cache todownload
|
|
||||||
{ location = Enclosure linkurl }
|
|
||||||
|
|
||||||
addmediafast linkurl mediaurl mediakey =
|
startUrlDownload :: TMVar Bool -> URLString -> CommandPerform -> CommandStart
|
||||||
ifM (pure (not (rawOption (downloadOptions opts)))
|
startUrlDownload cv u a = starting "addurl"
|
||||||
<&&> youtubeDlSupported linkurl)
|
(ActionItemOther (Just (UnquotedString u)))
|
||||||
( do
|
(SeekInput [])
|
||||||
starturl linkurl
|
(a `onException` recordfailure)
|
||||||
rundownload linkurl ".m" $ \f ->
|
where
|
||||||
checkCanAdd (downloadOptions opts) f $ \canadd -> do
|
recordfailure = liftIO $ atomically $ tryPutTMVar cv False
|
||||||
addWorkTree canadd addunlockedmatcher webUUID mediaurl f mediakey Nothing
|
|
||||||
return (Just [mediakey])
|
|
||||||
, performDownload' started addunlockedmatcher opts cache todownload
|
|
||||||
{ location = Enclosure linkurl }
|
|
||||||
)
|
|
||||||
|
|
||||||
starturl u = unless started $
|
|
||||||
showStartMessage (StartMessage "addurl" (ActionItemOther (Just (UnquotedString u))) (SeekInput []))
|
|
||||||
|
|
||||||
defaultTemplate :: String
|
defaultTemplate :: String
|
||||||
defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
|
defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
|
||||||
|
@ -476,7 +568,7 @@ feedProblem url message = ifM (checkFeedBroken url)
|
||||||
warning $ UnquotedString $ message ++ " (having repeated problems with feed: " ++ url ++ ")"
|
warning $ UnquotedString $ message ++ " (having repeated problems with feed: " ++ url ++ ")"
|
||||||
return False
|
return False
|
||||||
, do
|
, do
|
||||||
warning $ UnquotedString $ "warning: " ++ message
|
warning $ UnquotedString $ "warning: " ++ message ++ " (feed: " ++ url ++ ")"
|
||||||
return True
|
return True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -102,6 +102,12 @@ resulting in the new url being downloaded to such a filename.
|
||||||
url to a file that would be ignored. This makes such files be added
|
url to a file that would be ignored. This makes such files be added
|
||||||
despite any ignores.
|
despite any ignores.
|
||||||
|
|
||||||
|
* `--jobs=N` `-JN`
|
||||||
|
|
||||||
|
Runs multiple downloads parallel. For example: `-J4`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--backend`
|
* `--backend`
|
||||||
|
|
||||||
Specifies which key-value backend to use.
|
Specifies which key-value backend to use.
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
Make git-annex importfeed support -J. Ideally, when run with multiple feed
|
Make git-annex importfeed support -J. Ideally, when run with multiple feed
|
||||||
utls, it would check them in parallel, and also parallelize download of new
|
utls, it would check them in parallel, and also parallelize download of new
|
||||||
items found in the feeds. --[[Joey]]
|
items found in the feeds. --[[Joey]]
|
||||||
|
|
||||||
|
> [[done]] --[[Joey]]
|
||||||
|
|
Loading…
Add table
Reference in a new issue