importfeed: Support youtube playlists.

This commit is contained in:
Joey Hess 2013-12-29 15:52:20 -04:00
parent 920ce39b76
commit 81f498559a
6 changed files with 101 additions and 45 deletions

View file

@ -98,20 +98,25 @@ performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl
where
quviurl = setDownloader pageurl QuviDownloader
addurl (key, _backend) = next $ cleanup quviurl file key Nothing
geturl = do
key <- Backend.URL.fromUrl quviurl Nothing
ifM (pure relaxed <||> Annex.getState Annex.fast)
( next $ cleanup quviurl file key Nothing
, do
tmp <- fromRepo $ gitAnnexTmpLocation key
showOutput
ok <- Transfer.download webUUID key (Just file) Transfer.forwardRetry $ const $ do
liftIO $ createDirectoryIfMissing True (parentDir tmp)
downloadUrl [videourl] tmp
if ok
then next $ cleanup quviurl file key (Just tmp)
else stop
)
geturl = next $ addUrlFileQuvi relaxed quviurl videourl file
#endif
#ifdef WITH_QUVI
addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex Bool
addUrlFileQuvi relaxed quviurl videourl file = do
key <- Backend.URL.fromUrl quviurl Nothing
ifM (pure relaxed <||> Annex.getState Annex.fast)
( cleanup quviurl file key Nothing
, do
tmp <- fromRepo $ gitAnnexTmpLocation key
showOutput
ok <- Transfer.download webUUID key (Just file) Transfer.forwardRetry $ const $ do
liftIO $ createDirectoryIfMissing True (parentDir tmp)
downloadUrl [videourl] tmp
if ok
then cleanup quviurl file key (Just tmp)
else return False
)
#endif
perform :: Bool -> URLString -> FilePath -> CommandPerform

View file

@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Command.ImportFeed where
import Text.Feed.Import
@ -25,6 +27,11 @@ import Utility.Tmp
import Command.AddUrl (addUrlFile, relaxedOption)
import Annex.Perms
import Backend.URL (fromUrl)
#ifdef WITH_QUVI
import Annex.Quvi
import qualified Utility.Quvi as Quvi
import Command.AddUrl (addUrlFileQuvi)
#endif
def :: [Command]
def = [notBareRepo $ withOptions [templateOption, relaxedOption] $
@ -47,16 +54,16 @@ start relaxed cache url = do
perform :: Bool -> Cache -> URLString -> CommandPerform
perform relaxed cache url = do
v <- findEnclosures url
v <- findDownloads url
case v of
Just l | not (null l) -> do
ok <- and <$> mapM (downloadEnclosure relaxed cache) l
[] -> do
feedProblem url "bad feed content"
next $ return True
l -> do
ok <- and <$> mapM (performDownload relaxed cache) l
unless ok $
feedProblem url "problem downloading item"
next $ cleanup url True
_ -> do
feedProblem url "bad feed content"
next $ return True
cleanup :: URLString -> Bool -> CommandCleanup
cleanup url ok = do
@ -68,13 +75,10 @@ data ToDownload = ToDownload
{ feed :: Feed
, feedurl :: URLString
, item :: Item
, location :: URLString
, location :: DownloadLocation
}
mkToDownload :: Feed -> URLString -> Item -> Maybe ToDownload
mkToDownload f u i = case getItemEnclosure i of
Nothing -> Nothing
Just (enclosureurl, _, _) -> Just $ ToDownload f u i enclosureurl
data DownloadLocation = Enclosure URLString | QuviLink URLString
data Cache = Cache
{ knownurls :: S.Set URLString
@ -92,11 +96,26 @@ getCache opttemplate = ifM (Annex.getState Annex.force)
tmpl = Utility.Format.gen $ fromMaybe defaultTemplate opttemplate
ret s = return $ Cache s tmpl
findEnclosures :: URLString -> Annex (Maybe [ToDownload])
findEnclosures url = extract <$> downloadFeed url
findDownloads :: URLString -> Annex [ToDownload]
findDownloads u = go =<< downloadFeed u
where
extract Nothing = Nothing
extract (Just f) = Just $ mapMaybe (mkToDownload f url) (feedItems f)
go Nothing = pure []
go (Just f) = catMaybes <$> mapM (mk f) (feedItems f)
mk f i = case getItemEnclosure i of
Just (enclosureurl, _, _) -> return $
Just $ ToDownload f u i $ Enclosure enclosureurl
Nothing -> mkquvi f i
#ifdef WITH_QUVI
mkquvi f i = case getItemLink i of
Just link -> ifM (liftIO $ Quvi.supported link)
( return $ Just $ ToDownload f u i $ QuviLink link
, return Nothing
)
Nothing -> return Nothing
#else
mkquvi = return Nothing
#endif
{- Feeds change, so a feed download cannot be resumed. -}
downloadFeed :: URLString -> Annex (Maybe Feed)
@ -110,35 +129,54 @@ downloadFeed url = do
, return Nothing
)
{- Avoids downloading any urls that are already known to be associated
- with a file in the annex, unless forced. -}
downloadEnclosure :: Bool -> Cache -> ToDownload -> Annex Bool
downloadEnclosure relaxed cache enclosure
| S.member url (knownurls cache) = ifM forced (go, return True)
| otherwise = go
performDownload :: Bool -> Cache -> ToDownload -> Annex Bool
performDownload relaxed cache todownload = case location todownload of
Enclosure url -> checkknown url $
rundownload url (takeExtension url) $
addUrlFile relaxed url
QuviLink pageurl -> do
mp <- withQuviOptions Quvi.query [Quvi.quiet, Quvi.httponly] pageurl
case mp of
Nothing -> return False
Just page -> case headMaybe $ Quvi.pageLinks page of
Nothing -> return False
Just link -> do
let quviurl = setDownloader pageurl QuviDownloader
let videourl = Quvi.linkUrl link
checkknown videourl $
rundownload videourl ("." ++ Quvi.linkSuffix link) $
addUrlFileQuvi relaxed quviurl videourl
where
forced = Annex.getState Annex.force
url = location enclosure
go = do
dest <- makeunique (1 :: Integer) $ feedFile (template cache) enclosure
{- Avoids downloading any urls that are already known to be
- associated with a file in the annex, unless forced. -}
checkknown url a
| S.member url (knownurls cache) = ifM forced (a, return True)
| otherwise = a
rundownload url extension getter = do
dest <- makeunique url (1 :: Integer) $
feedFile (template cache) todownload extension
case dest of
Nothing -> return True
Just f -> do
showStart "addurl" f
ok <- addUrlFile relaxed url f
ok <- getter f
if ok
then do
showEndOk
return True
else do
showEndFail
checkFeedBroken (feedurl enclosure)
checkFeedBroken (feedurl todownload)
{- Find a unique filename to save the url to.
- If the file exists, prefixes it with a number.
- When forced, the file may already exist and have the same
- url, in which case Nothing is returned as it does not need
- to be re-downloaded. -}
makeunique n file = ifM alreadyexists
makeunique url n file = ifM alreadyexists
( ifM forced
( ifAnnexed f checksameurl tryanother
, tryanother
@ -151,7 +189,7 @@ downloadEnclosure relaxed cache enclosure
else
let (d, base) = splitFileName file
in d </> show n ++ "_" ++ base
tryanother = makeunique (n + 1) file
tryanother = makeunique url (n + 1) file
alreadyexists = liftIO $ isJust <$> catchMaybeIO (getSymbolicLinkStatus f)
checksameurl (k, _) = ifM (elem url <$> getUrls k)
( return Nothing
@ -163,8 +201,8 @@ defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
{- Generates a filename to use for a feed item by filling out the template.
- The filename may not be unique. -}
feedFile :: Utility.Format.Format -> ToDownload -> FilePath
feedFile tmpl i = Utility.Format.format tmpl $ M.fromList
feedFile :: Utility.Format.Format -> ToDownload -> String -> FilePath
feedFile tmpl i extension = Utility.Format.format tmpl $ M.fromList
[ field "feedtitle" $ getFeedTitle $ feed i
, fieldMaybe "itemtitle" $ getItemTitle $ item i
, fieldMaybe "feedauthor" $ getFeedAuthor $ feed i
@ -173,7 +211,7 @@ feedFile tmpl i = Utility.Format.format tmpl $ M.fromList
, fieldMaybe "itemdescription" $ getItemDescription $ item i
, fieldMaybe "itemrights" $ getItemRights $ item i
, fieldMaybe "itemid" $ snd <$> getItemId (item i)
, ("extension", sanitizeFilePath $ takeExtension $ location i)
, ("extension", sanitizeFilePath extension)
]
where
field k v =

1
debian/changelog vendored
View file

@ -14,6 +14,7 @@ git-annex (5.20131222) UNRELEASED; urgency=medium
This also fixes a problem when a direct mode repo was somehow set to v3
rather than v4, and so the automatic direct mode upgrade to v5 was not
done.
* importfeed: Support youtube playlists.
-- Joey Hess <joeyh@debian.org> Tue, 24 Dec 2013 13:54:32 -0400

View file

@ -258,6 +258,10 @@ subdirectories).
(Other available variables: feedauthor, itemauthor, itemsummary, itemdescription, itemrights, itemid)
The `--relaxed` and `--fast` options behave the same as they do in addurl.
When quvi is installed, links in the feed are tested to see if they
are on a video hosting site, and the video is downloaded. This allows
importing eg, youtube playlists.
* `watch`

View file

@ -61,3 +61,9 @@ time-delayed deletion of upstream content. You can also work around slow
downloads upstream by podcatching to a server with ample bandwidth or work
around a slow local Internet connection by podcatching to your home server
and transferring to your laptop on demand.
## youtube playlists
If your git-annex is also built with quvi support, you can also use
`git annex importfeed` on youtube playlists. It will automatically download
the videos linked to by the playlist.

View file

@ -26,3 +26,5 @@ It would be great if this functionality could be integrated directly into git an
Best
Karsten
> [[done]] --[[Joey]]