importfeed: Support youtube playlists.

This commit is contained in:
Joey Hess 2013-12-29 15:52:20 -04:00
parent 920ce39b76
commit 81f498559a
6 changed files with 101 additions and 45 deletions

View file

@ -98,20 +98,25 @@ performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl
where where
quviurl = setDownloader pageurl QuviDownloader quviurl = setDownloader pageurl QuviDownloader
addurl (key, _backend) = next $ cleanup quviurl file key Nothing addurl (key, _backend) = next $ cleanup quviurl file key Nothing
geturl = do geturl = next $ addUrlFileQuvi relaxed quviurl videourl file
key <- Backend.URL.fromUrl quviurl Nothing #endif
ifM (pure relaxed <||> Annex.getState Annex.fast)
( next $ cleanup quviurl file key Nothing #ifdef WITH_QUVI
, do addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex Bool
tmp <- fromRepo $ gitAnnexTmpLocation key addUrlFileQuvi relaxed quviurl videourl file = do
showOutput key <- Backend.URL.fromUrl quviurl Nothing
ok <- Transfer.download webUUID key (Just file) Transfer.forwardRetry $ const $ do ifM (pure relaxed <||> Annex.getState Annex.fast)
liftIO $ createDirectoryIfMissing True (parentDir tmp) ( cleanup quviurl file key Nothing
downloadUrl [videourl] tmp , do
if ok tmp <- fromRepo $ gitAnnexTmpLocation key
then next $ cleanup quviurl file key (Just tmp) showOutput
else stop ok <- Transfer.download webUUID key (Just file) Transfer.forwardRetry $ const $ do
) liftIO $ createDirectoryIfMissing True (parentDir tmp)
downloadUrl [videourl] tmp
if ok
then cleanup quviurl file key (Just tmp)
else return False
)
#endif #endif
perform :: Bool -> URLString -> FilePath -> CommandPerform perform :: Bool -> URLString -> FilePath -> CommandPerform

View file

@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE CPP #-}
module Command.ImportFeed where module Command.ImportFeed where
import Text.Feed.Import import Text.Feed.Import
@ -25,6 +27,11 @@ import Utility.Tmp
import Command.AddUrl (addUrlFile, relaxedOption) import Command.AddUrl (addUrlFile, relaxedOption)
import Annex.Perms import Annex.Perms
import Backend.URL (fromUrl) import Backend.URL (fromUrl)
#ifdef WITH_QUVI
import Annex.Quvi
import qualified Utility.Quvi as Quvi
import Command.AddUrl (addUrlFileQuvi)
#endif
def :: [Command] def :: [Command]
def = [notBareRepo $ withOptions [templateOption, relaxedOption] $ def = [notBareRepo $ withOptions [templateOption, relaxedOption] $
@ -47,16 +54,16 @@ start relaxed cache url = do
perform :: Bool -> Cache -> URLString -> CommandPerform perform :: Bool -> Cache -> URLString -> CommandPerform
perform relaxed cache url = do perform relaxed cache url = do
v <- findEnclosures url v <- findDownloads url
case v of case v of
Just l | not (null l) -> do [] -> do
ok <- and <$> mapM (downloadEnclosure relaxed cache) l feedProblem url "bad feed content"
next $ return True
l -> do
ok <- and <$> mapM (performDownload relaxed cache) l
unless ok $ unless ok $
feedProblem url "problem downloading item" feedProblem url "problem downloading item"
next $ cleanup url True next $ cleanup url True
_ -> do
feedProblem url "bad feed content"
next $ return True
cleanup :: URLString -> Bool -> CommandCleanup cleanup :: URLString -> Bool -> CommandCleanup
cleanup url ok = do cleanup url ok = do
@ -68,13 +75,10 @@ data ToDownload = ToDownload
{ feed :: Feed { feed :: Feed
, feedurl :: URLString , feedurl :: URLString
, item :: Item , item :: Item
, location :: URLString , location :: DownloadLocation
} }
mkToDownload :: Feed -> URLString -> Item -> Maybe ToDownload data DownloadLocation = Enclosure URLString | QuviLink URLString
mkToDownload f u i = case getItemEnclosure i of
Nothing -> Nothing
Just (enclosureurl, _, _) -> Just $ ToDownload f u i enclosureurl
data Cache = Cache data Cache = Cache
{ knownurls :: S.Set URLString { knownurls :: S.Set URLString
@ -92,11 +96,26 @@ getCache opttemplate = ifM (Annex.getState Annex.force)
tmpl = Utility.Format.gen $ fromMaybe defaultTemplate opttemplate tmpl = Utility.Format.gen $ fromMaybe defaultTemplate opttemplate
ret s = return $ Cache s tmpl ret s = return $ Cache s tmpl
findEnclosures :: URLString -> Annex (Maybe [ToDownload]) findDownloads :: URLString -> Annex [ToDownload]
findEnclosures url = extract <$> downloadFeed url findDownloads u = go =<< downloadFeed u
where where
extract Nothing = Nothing go Nothing = pure []
extract (Just f) = Just $ mapMaybe (mkToDownload f url) (feedItems f) go (Just f) = catMaybes <$> mapM (mk f) (feedItems f)
mk f i = case getItemEnclosure i of
Just (enclosureurl, _, _) -> return $
Just $ ToDownload f u i $ Enclosure enclosureurl
Nothing -> mkquvi f i
#ifdef WITH_QUVI
mkquvi f i = case getItemLink i of
Just link -> ifM (liftIO $ Quvi.supported link)
( return $ Just $ ToDownload f u i $ QuviLink link
, return Nothing
)
Nothing -> return Nothing
#else
mkquvi = return Nothing
#endif
{- Feeds change, so a feed download cannot be resumed. -} {- Feeds change, so a feed download cannot be resumed. -}
downloadFeed :: URLString -> Annex (Maybe Feed) downloadFeed :: URLString -> Annex (Maybe Feed)
@ -110,35 +129,54 @@ downloadFeed url = do
, return Nothing , return Nothing
) )
{- Avoids downloading any urls that are already known to be associated performDownload :: Bool -> Cache -> ToDownload -> Annex Bool
- with a file in the annex, unless forced. -} performDownload relaxed cache todownload = case location todownload of
downloadEnclosure :: Bool -> Cache -> ToDownload -> Annex Bool Enclosure url -> checkknown url $
downloadEnclosure relaxed cache enclosure rundownload url (takeExtension url) $
| S.member url (knownurls cache) = ifM forced (go, return True) addUrlFile relaxed url
| otherwise = go QuviLink pageurl -> do
mp <- withQuviOptions Quvi.query [Quvi.quiet, Quvi.httponly] pageurl
case mp of
Nothing -> return False
Just page -> case headMaybe $ Quvi.pageLinks page of
Nothing -> return False
Just link -> do
let quviurl = setDownloader pageurl QuviDownloader
let videourl = Quvi.linkUrl link
checkknown videourl $
rundownload videourl ("." ++ Quvi.linkSuffix link) $
addUrlFileQuvi relaxed quviurl videourl
where where
forced = Annex.getState Annex.force forced = Annex.getState Annex.force
url = location enclosure
go = do {- Avoids downloading any urls that are already known to be
dest <- makeunique (1 :: Integer) $ feedFile (template cache) enclosure - associated with a file in the annex, unless forced. -}
checkknown url a
| S.member url (knownurls cache) = ifM forced (a, return True)
| otherwise = a
rundownload url extension getter = do
dest <- makeunique url (1 :: Integer) $
feedFile (template cache) todownload extension
case dest of case dest of
Nothing -> return True Nothing -> return True
Just f -> do Just f -> do
showStart "addurl" f showStart "addurl" f
ok <- addUrlFile relaxed url f ok <- getter f
if ok if ok
then do then do
showEndOk showEndOk
return True return True
else do else do
showEndFail showEndFail
checkFeedBroken (feedurl enclosure) checkFeedBroken (feedurl todownload)
{- Find a unique filename to save the url to. {- Find a unique filename to save the url to.
- If the file exists, prefixes it with a number. - If the file exists, prefixes it with a number.
- When forced, the file may already exist and have the same - When forced, the file may already exist and have the same
- url, in which case Nothing is returned as it does not need - url, in which case Nothing is returned as it does not need
- to be re-downloaded. -} - to be re-downloaded. -}
makeunique n file = ifM alreadyexists makeunique url n file = ifM alreadyexists
( ifM forced ( ifM forced
( ifAnnexed f checksameurl tryanother ( ifAnnexed f checksameurl tryanother
, tryanother , tryanother
@ -151,7 +189,7 @@ downloadEnclosure relaxed cache enclosure
else else
let (d, base) = splitFileName file let (d, base) = splitFileName file
in d </> show n ++ "_" ++ base in d </> show n ++ "_" ++ base
tryanother = makeunique (n + 1) file tryanother = makeunique url (n + 1) file
alreadyexists = liftIO $ isJust <$> catchMaybeIO (getSymbolicLinkStatus f) alreadyexists = liftIO $ isJust <$> catchMaybeIO (getSymbolicLinkStatus f)
checksameurl (k, _) = ifM (elem url <$> getUrls k) checksameurl (k, _) = ifM (elem url <$> getUrls k)
( return Nothing ( return Nothing
@ -163,8 +201,8 @@ defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
{- Generates a filename to use for a feed item by filling out the template. {- Generates a filename to use for a feed item by filling out the template.
- The filename may not be unique. -} - The filename may not be unique. -}
feedFile :: Utility.Format.Format -> ToDownload -> FilePath feedFile :: Utility.Format.Format -> ToDownload -> String -> FilePath
feedFile tmpl i = Utility.Format.format tmpl $ M.fromList feedFile tmpl i extension = Utility.Format.format tmpl $ M.fromList
[ field "feedtitle" $ getFeedTitle $ feed i [ field "feedtitle" $ getFeedTitle $ feed i
, fieldMaybe "itemtitle" $ getItemTitle $ item i , fieldMaybe "itemtitle" $ getItemTitle $ item i
, fieldMaybe "feedauthor" $ getFeedAuthor $ feed i , fieldMaybe "feedauthor" $ getFeedAuthor $ feed i
@ -173,7 +211,7 @@ feedFile tmpl i = Utility.Format.format tmpl $ M.fromList
, fieldMaybe "itemdescription" $ getItemDescription $ item i , fieldMaybe "itemdescription" $ getItemDescription $ item i
, fieldMaybe "itemrights" $ getItemRights $ item i , fieldMaybe "itemrights" $ getItemRights $ item i
, fieldMaybe "itemid" $ snd <$> getItemId (item i) , fieldMaybe "itemid" $ snd <$> getItemId (item i)
, ("extension", sanitizeFilePath $ takeExtension $ location i) , ("extension", sanitizeFilePath extension)
] ]
where where
field k v = field k v =

1
debian/changelog vendored
View file

@ -14,6 +14,7 @@ git-annex (5.20131222) UNRELEASED; urgency=medium
This also fixes a problem when a direct mode repo was somehow set to v3 This also fixes a problem when a direct mode repo was somehow set to v3
rather than v4, and so the automatic direct mode upgrade to v5 was not rather than v4, and so the automatic direct mode upgrade to v5 was not
done. done.
* importfeed: Support youtube playlists.
-- Joey Hess <joeyh@debian.org> Tue, 24 Dec 2013 13:54:32 -0400 -- Joey Hess <joeyh@debian.org> Tue, 24 Dec 2013 13:54:32 -0400

View file

@ -258,6 +258,10 @@ subdirectories).
(Other available variables: feedauthor, itemauthor, itemsummary, itemdescription, itemrights, itemid) (Other available variables: feedauthor, itemauthor, itemsummary, itemdescription, itemrights, itemid)
The `--relaxed` and `--fast` options behave the same as they do in addurl. The `--relaxed` and `--fast` options behave the same as they do in addurl.
When quvi is installed, links in the feed are tested to see if they
are on a video hosting site, and the video is downloaded. This allows
importing eg, youtube playlists.
* `watch` * `watch`

View file

@ -61,3 +61,9 @@ time-delayed deletion of upstream content. You can also work around slow
downloads upstream by podcatching to a server with ample bandwidth or work downloads upstream by podcatching to a server with ample bandwidth or work
around a slow local Internet connection by podcatching to your home server around a slow local Internet connection by podcatching to your home server
and transferring to your laptop on demand. and transferring to your laptop on demand.
## youtube playlists
If your git-annex is also built with quvi support, you can also use
`git annex importfeed` on youtube playlists. It will automatically download
the videos linked to by the playlist.

View file

@ -26,3 +26,5 @@ It would be great if this functionality could be integrated directly into git an
Best Best
Karsten Karsten
> [[done]] --[[Joey]]