2013-07-28 19:27:36 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
|
|
|
- Copyright 2013 Joey Hess <joey@kitenet.net>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Command.ImportFeed where
|
|
|
|
|
|
|
|
import Text.Feed.Import
|
|
|
|
import Text.Feed.Query
|
|
|
|
import Text.Feed.Types
|
|
|
|
import qualified Data.Set as S
|
|
|
|
import qualified Data.Map as M
|
|
|
|
import Data.Char
|
|
|
|
|
|
|
|
import Common.Annex
|
2013-07-31 16:19:00 +00:00
|
|
|
import qualified Annex
|
2013-07-28 19:27:36 +00:00
|
|
|
import Command
|
|
|
|
import qualified Utility.Url as Url
|
|
|
|
import Logs.Web
|
|
|
|
import qualified Option
|
|
|
|
import qualified Utility.Format
|
|
|
|
import Utility.Tmp
|
|
|
|
import Command.AddUrl (addUrlFile, relaxedOption)
|
|
|
|
|
|
|
|
def :: [Command]
|
|
|
|
def = [notBareRepo $ withOptions [templateOption, relaxedOption] $
|
|
|
|
command "importfeed" (paramRepeating paramUrl) seek
|
|
|
|
SectionCommon "import files from podcast feeds"]
|
|
|
|
|
|
|
|
templateOption :: Option
|
|
|
|
templateOption = Option.field [] "template" paramFormat "template for filenames"
|
|
|
|
|
|
|
|
seek :: [CommandSeek]
|
|
|
|
seek = [withField templateOption return $ \tmpl ->
|
|
|
|
withFlag relaxedOption $ \relaxed ->
|
2013-07-28 22:16:24 +00:00
|
|
|
withValue (getCache tmpl) $ \cache ->
|
|
|
|
withStrings $ start relaxed cache]
|
2013-07-28 19:27:36 +00:00
|
|
|
|
2013-07-28 22:16:24 +00:00
|
|
|
start :: Bool -> Cache -> URLString -> CommandStart
|
|
|
|
start relaxed cache url = do
|
|
|
|
showStart "importfeed" url
|
|
|
|
next $ perform relaxed cache url
|
|
|
|
|
|
|
|
perform :: Bool -> Cache -> URLString -> CommandPerform
|
|
|
|
perform relaxed cache url = do
|
|
|
|
v <- findEnclosures url
|
|
|
|
case v of
|
|
|
|
Just l | not (null l) -> do
|
|
|
|
mapM_ (downloadEnclosure relaxed cache) l
|
|
|
|
next $ return True
|
|
|
|
_ -> stop
|
|
|
|
|
2013-07-28 23:08:50 +00:00
|
|
|
data ToDownload = ToDownload
|
|
|
|
{ feed :: Feed
|
|
|
|
, item :: Item
|
|
|
|
, location :: URLString
|
|
|
|
}
|
|
|
|
|
|
|
|
mkToDownload :: Feed -> Item -> Maybe ToDownload
|
|
|
|
mkToDownload f i = case getItemEnclosure i of
|
|
|
|
Nothing -> Nothing
|
|
|
|
Just (enclosureurl, _, _) -> Just $ ToDownload f i enclosureurl
|
|
|
|
|
2013-07-28 22:16:24 +00:00
|
|
|
data Cache = Cache
|
|
|
|
{ knownurls :: S.Set URLString
|
|
|
|
, template :: Utility.Format.Format
|
|
|
|
}
|
|
|
|
|
|
|
|
getCache :: Maybe String -> Annex Cache
|
2013-07-31 16:19:00 +00:00
|
|
|
getCache opttemplate = ifM (Annex.getState Annex.force)
|
|
|
|
( ret S.empty
|
|
|
|
, do
|
|
|
|
showSideAction "checking known urls"
|
|
|
|
ret =<< S.fromList <$> knownUrls
|
|
|
|
)
|
2013-07-28 19:27:36 +00:00
|
|
|
where
|
2013-07-28 22:16:24 +00:00
|
|
|
tmpl = Utility.Format.gen $ fromMaybe defaultTemplate opttemplate
|
2013-07-31 16:19:00 +00:00
|
|
|
ret s = return $ Cache s tmpl
|
2013-07-28 19:27:36 +00:00
|
|
|
|
|
|
|
findEnclosures :: URLString -> Annex (Maybe [ToDownload])
|
|
|
|
findEnclosures url = go =<< downloadFeed url
|
|
|
|
where
|
|
|
|
go Nothing = do
|
|
|
|
warning $ "failed to parse feed " ++ url
|
|
|
|
return Nothing
|
|
|
|
go (Just f) = return $ Just $
|
|
|
|
mapMaybe (mkToDownload f) (feedItems f)
|
|
|
|
|
|
|
|
{- Feeds change, so a feed download cannot be resumed. -}
|
|
|
|
downloadFeed :: URLString -> Annex (Maybe Feed)
|
|
|
|
downloadFeed url = do
|
|
|
|
showOutput
|
|
|
|
liftIO $ withTmpFile "feed" $ \f h -> do
|
2013-07-28 21:24:30 +00:00
|
|
|
fileEncoding h
|
2013-07-28 19:27:36 +00:00
|
|
|
ifM (Url.download url [] [] f)
|
|
|
|
( parseFeedString <$> hGetContentsStrict h
|
|
|
|
, return Nothing
|
|
|
|
)
|
|
|
|
|
|
|
|
{- Avoids downloading any urls that are already known to be associated
|
2013-07-31 16:19:00 +00:00
|
|
|
- with a file in the annex, unless forced. -}
|
2013-07-28 22:16:24 +00:00
|
|
|
downloadEnclosure :: Bool -> Cache -> ToDownload -> Annex ()
|
|
|
|
downloadEnclosure relaxed cache enclosure
|
2013-07-31 16:19:00 +00:00
|
|
|
| S.member url (knownurls cache) =
|
|
|
|
whenM (Annex.getState Annex.force) go
|
|
|
|
| otherwise = go
|
|
|
|
where
|
|
|
|
url = location enclosure
|
|
|
|
go = do
|
2013-07-28 22:16:24 +00:00
|
|
|
dest <- liftIO $ feedFile (template cache) enclosure
|
2013-07-28 19:27:36 +00:00
|
|
|
showStart "addurl" dest
|
|
|
|
ifM (addUrlFile relaxed url dest)
|
|
|
|
( showEndOk
|
|
|
|
, showEndFail
|
|
|
|
)
|
2013-07-28 22:16:24 +00:00
|
|
|
|
|
|
|
defaultTemplate :: String
|
2013-07-28 23:08:50 +00:00
|
|
|
defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
|
2013-07-28 19:27:36 +00:00
|
|
|
|
|
|
|
{- Generate a unique filename for the feed item by filling
|
|
|
|
- out the template.
|
|
|
|
-
|
|
|
|
- Since each feed url is only downloaded once,
|
|
|
|
- if the file already exists, two items with different urls
|
2013-07-29 00:15:20 +00:00
|
|
|
- are conflicting. A number is added to disambiguate.
|
2013-07-28 19:27:36 +00:00
|
|
|
-}
|
|
|
|
feedFile :: Utility.Format.Format -> ToDownload -> IO FilePath
|
2013-07-29 00:14:13 +00:00
|
|
|
feedFile tmpl i = makeUnique 1 $
|
2013-07-28 22:16:24 +00:00
|
|
|
Utility.Format.format tmpl $ M.fromList
|
2013-07-28 19:27:36 +00:00
|
|
|
[ field "feedtitle" $ getFeedTitle $ feed i
|
|
|
|
, fieldMaybe "itemtitle" $ getItemTitle $ item i
|
|
|
|
, fieldMaybe "feedauthor" $ getFeedAuthor $ feed i
|
|
|
|
, fieldMaybe "itemauthor" $ getItemAuthor $ item i
|
|
|
|
, fieldMaybe "itemsummary" $ getItemSummary $ item i
|
|
|
|
, fieldMaybe "itemdescription" $ getItemDescription $ item i
|
|
|
|
, fieldMaybe "itemrights" $ getItemRights $ item i
|
|
|
|
, fieldMaybe "itemid" $ snd <$> getItemId (item i)
|
2013-07-28 23:08:50 +00:00
|
|
|
, ("extension", map sanitize $ takeExtension $ location i)
|
2013-07-28 19:27:36 +00:00
|
|
|
]
|
|
|
|
where
|
|
|
|
field k v =
|
|
|
|
let s = map sanitize v in
|
|
|
|
if null s then (k, "none") else (k, s)
|
|
|
|
fieldMaybe k Nothing = (k, "none")
|
|
|
|
fieldMaybe k (Just v) = field k v
|
|
|
|
|
|
|
|
sanitize c
|
|
|
|
| isSpace c || isPunctuation c || c == '/' = '_'
|
|
|
|
| otherwise = c
|
|
|
|
|
|
|
|
makeUnique :: Integer -> FilePath -> IO FilePath
|
|
|
|
makeUnique n file =
|
|
|
|
ifM (isJust <$> catchMaybeIO (getSymbolicLinkStatus f))
|
|
|
|
( makeUnique (n + 1) file
|
2013-07-29 00:14:13 +00:00
|
|
|
, return f
|
2013-07-28 19:27:36 +00:00
|
|
|
)
|
|
|
|
where
|
2013-07-29 00:14:13 +00:00
|
|
|
f = if n < 2
|
2013-07-28 19:27:36 +00:00
|
|
|
then file
|
|
|
|
else
|
|
|
|
let (d, base) = splitFileName file
|
|
|
|
in d </> show n ++ "_" ++ base
|