move Feed and Item out of ToDownload

This is groundwork for producing ToDownload in other ways, that may not
be entirely isomorphic with feeds. Eg by using yt-dlp.
This commit is contained in:
Joey Hess 2024-01-30 14:00:56 -04:00
parent b7f02e4f89
commit d7949f8202
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38

View file

@ -190,10 +190,13 @@ parseFeedFromFile' f = catchMaybeIO (parseFeedFromFile f)
#endif #endif
data ToDownload = ToDownload data ToDownload = ToDownload
{ feed :: Feed { feedurl :: URLString
, feedurl :: URLString
, item :: Item
, location :: DownloadLocation , location :: DownloadLocation
, itemid :: Maybe B.ByteString
-- Either the parsed or unparsed date.
, itempubdate :: Maybe (Either String UTCTime)
-- Fields that are used as metadata and to generate the filename.
, itemfields :: [(String, String)]
} }
data DownloadLocation = Enclosure URLString | MediaLink URLString data DownloadLocation = Enclosure URLString | MediaLink URLString
@ -226,12 +229,24 @@ findDownloads u f = catMaybes $ map mk (feedItems f)
where where
mk i = case getItemEnclosure i of mk i = case getItemEnclosure i of
Just (enclosureurl, _, _) -> Just (enclosureurl, _, _) ->
Just $ ToDownload f u i $ Enclosure $ Just $ mk' i
decodeBS $ fromFeedText enclosureurl (Enclosure $ decodeBS $ fromFeedText enclosureurl)
Nothing -> case getItemLink i of Nothing -> case getItemLink i of
Just l -> Just $ ToDownload f u i $ Just l -> Just $ mk' i
MediaLink $ decodeBS $ fromFeedText l (MediaLink $ decodeBS $ fromFeedText l)
Nothing -> Nothing Nothing -> Nothing
mk' i l = ToDownload
{ feedurl = u
, location = l
, itemid = case getItemId i of
Just (_, iid) -> Just (fromFeedText iid)
_ -> Nothing
, itempubdate = case getItemPublishDate i :: Maybe (Maybe UTCTime) of
Just (Just d) -> Just (Right d)
_ -> Left . decodeBS . fromFeedText
<$> getItemPublishDateString i
, itemfields = extractFeedItemFields f i u
}
{- Feeds change, so a feed download cannot be resumed. -} {- Feeds change, so a feed download cannot be resumed. -}
downloadFeed :: URLString -> FilePath -> Annex Bool downloadFeed :: URLString -> FilePath -> Annex Bool
@ -261,13 +276,13 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown
checkknown url a = case dbhandle cache of checkknown url a = case dbhandle cache of
Just db -> ifM (liftIO $ Db.isKnownUrl db url) Just db -> ifM (liftIO $ Db.isKnownUrl db url)
( nothingtodo ( nothingtodo
, case getItemId (item todownload) of , case itemid todownload of
Just (_, itemid) -> Just iid ->
ifM (liftIO $ Db.isKnownItemId db (fromFeedText itemid)) ifM (liftIO $ Db.isKnownItemId db iid)
( nothingtodo ( nothingtodo
, a , a
) )
_ -> a Nothing -> a
) )
Nothing -> a Nothing -> a
@ -451,9 +466,9 @@ defaultTemplate = "${feedtitle}/${itemtitle}${extension}"
feedFile :: Utility.Format.Format -> ToDownload -> String -> FilePath feedFile :: Utility.Format.Format -> ToDownload -> String -> FilePath
feedFile tmpl i extension = sanitizeLeadingFilePathCharacter $ feedFile tmpl i extension = sanitizeLeadingFilePathCharacter $
Utility.Format.format tmpl $ Utility.Format.format tmpl $
M.map sanitizeFilePathComponent $ M.fromList $ extractFields i ++ M.map sanitizeFilePathComponent $ M.fromList $ itemfields i ++
[ ("extension", extension) [ ("extension", extension)
, extractField "itempubdate" [itempubdate] , extractField "itempubdate" [itempubdatestring]
, extractField "itempubyear" [itempubyear] , extractField "itempubyear" [itempubyear]
, extractField "itempubmonth" [itempubmonth] , extractField "itempubmonth" [itempubmonth]
, extractField "itempubday" [itempubday] , extractField "itempubday" [itempubday]
@ -462,18 +477,13 @@ feedFile tmpl i extension = sanitizeLeadingFilePathCharacter $
, extractField "itempubsecond" [itempubsecond] , extractField "itempubsecond" [itempubsecond]
] ]
where where
itm = item i pubdate = maybe Nothing eitherToMaybe (itempubdate i)
pubdate = case getItemPublishDate itm :: Maybe (Maybe UTCTime) of itempubdatestring = case itempubdate i of
Just (Just d) -> Just d Just (Right pd) -> Just $ formatTime defaultTimeLocale "%F" pd
_ -> Nothing
itempubdate = case pubdate of
Just pd -> Just $
formatTime defaultTimeLocale "%F" pd
-- if date cannot be parsed, use the raw string -- if date cannot be parsed, use the raw string
Nothing-> replace "/" "-" . decodeBS . fromFeedText Just (Left s) -> Just $ replace "/" "-" s
<$> getItemPublishDateString itm Nothing -> Nothing
(itempubyear, itempubmonth, itempubday) = case pubdate of (itempubyear, itempubmonth, itempubday) = case pubdate of
Nothing -> (Nothing, Nothing, Nothing) Nothing -> (Nothing, Nothing, Nothing)
@ -492,40 +502,38 @@ feedFile tmpl i extension = sanitizeLeadingFilePathCharacter $
) )
extractMetaData :: ToDownload -> MetaData extractMetaData :: ToDownload -> MetaData
extractMetaData i = case getItemPublishDate (item i) :: Maybe (Maybe UTCTime) of extractMetaData i = case itempubdate i of
Just (Just d) -> unionMetaData meta (dateMetaData d meta) Just (Right d) -> unionMetaData meta (dateMetaData d meta)
_ -> meta _ -> meta
where where
tometa (k, v) = (mkMetaFieldUnchecked (T.pack k), S.singleton (toMetaValue (encodeBS v))) tometa (k, v) = (mkMetaFieldUnchecked (T.pack k), S.singleton (toMetaValue (encodeBS v)))
meta = MetaData $ M.fromList $ map tometa $ extractFields i meta = MetaData $ M.fromList $ map tometa $ itemfields i
minimalMetaData :: ToDownload -> MetaData minimalMetaData :: ToDownload -> MetaData
minimalMetaData i = case getItemId (item i) of minimalMetaData i = case itemid i of
(Nothing) -> emptyMetaData Nothing -> emptyMetaData
(Just (_, itemid)) -> MetaData $ M.singleton itemIdField Just iid -> MetaData $ M.singleton itemIdField
(S.singleton $ toMetaValue $ fromFeedText itemid) (S.singleton $ toMetaValue iid)
{- Extract fields from the feed and item, that are both used as metadata, extractFeedItemFields :: Feed -> Item -> URLString -> [(String, String)]
- and to generate the filename. -} extractFeedItemFields f i u = map (uncurry extractField)
extractFields :: ToDownload -> [(String, String)] [ ("feedurl", [Just u])
extractFields i = map (uncurry extractField)
[ ("feedurl", [Just (feedurl i)])
, ("feedtitle", [feedtitle]) , ("feedtitle", [feedtitle])
, ("itemtitle", [itemtitle]) , ("itemtitle", [itemtitle])
, ("feedauthor", [feedauthor]) , ("feedauthor", [feedauthor])
, ("itemauthor", [itemauthor]) , ("itemauthor", [itemauthor])
, ("itemsummary", [decodeBS . fromFeedText <$> getItemSummary (item i)]) , ("itemsummary", [decodeBS . fromFeedText <$> getItemSummary i])
, ("itemdescription", [decodeBS . fromFeedText <$> getItemDescription (item i)]) , ("itemdescription", [decodeBS . fromFeedText <$> getItemDescription i])
, ("itemrights", [decodeBS . fromFeedText <$> getItemRights (item i)]) , ("itemrights", [decodeBS . fromFeedText <$> getItemRights i])
, ("itemid", [decodeBS . fromFeedText . snd <$> getItemId (item i)]) , ("itemid", [decodeBS . fromFeedText . snd <$> getItemId i])
, ("title", [itemtitle, feedtitle]) , ("title", [itemtitle, feedtitle])
, ("author", [itemauthor, feedauthor]) , ("author", [itemauthor, feedauthor])
] ]
where where
feedtitle = Just $ decodeBS $ fromFeedText $ getFeedTitle $ feed i feedtitle = Just $ decodeBS $ fromFeedText $ getFeedTitle f
itemtitle = decodeBS . fromFeedText <$> getItemTitle (item i) itemtitle = decodeBS . fromFeedText <$> getItemTitle i
feedauthor = decodeBS . fromFeedText <$> getFeedAuthor (feed i) feedauthor = decodeBS . fromFeedText <$> getFeedAuthor f
itemauthor = decodeBS . fromFeedText <$> getItemAuthor (item i) itemauthor = decodeBS . fromFeedText <$> getItemAuthor i
extractField :: String -> [Maybe String] -> (String, String) extractField :: String -> [Maybe String] -> (String, String)
extractField k [] = (k, noneValue) extractField k [] = (k, noneValue)