importfeed: Ignores transient problems with feeds. Only exits nonzero when a feed has repeatedly had a problems for at least 1 day.
This commit is contained in:
parent
17f05d6572
commit
24c8a6042b
3 changed files with 79 additions and 20 deletions
|
@ -13,6 +13,7 @@ import Text.Feed.Types
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.Char
|
import Data.Char
|
||||||
|
import Data.Time.Clock
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
|
@ -23,6 +24,8 @@ import qualified Option
|
||||||
import qualified Utility.Format
|
import qualified Utility.Format
|
||||||
import Utility.Tmp
|
import Utility.Tmp
|
||||||
import Command.AddUrl (addUrlFile, relaxedOption)
|
import Command.AddUrl (addUrlFile, relaxedOption)
|
||||||
|
import Annex.Perms
|
||||||
|
import Backend.URL (fromUrl)
|
||||||
|
|
||||||
def :: [Command]
|
def :: [Command]
|
||||||
def = [notBareRepo $ withOptions [templateOption, relaxedOption] $
|
def = [notBareRepo $ withOptions [templateOption, relaxedOption] $
|
||||||
|
@ -48,20 +51,30 @@ perform relaxed cache url = do
|
||||||
v <- findEnclosures url
|
v <- findEnclosures url
|
||||||
case v of
|
case v of
|
||||||
Just l | not (null l) -> do
|
Just l | not (null l) -> do
|
||||||
mapM_ (downloadEnclosure relaxed cache) l
|
ok <- all id
|
||||||
|
<$> mapM (downloadEnclosure relaxed cache) l
|
||||||
|
next $ cleanup url ok
|
||||||
|
_ -> do
|
||||||
|
feedProblem url "bad feed content"
|
||||||
next $ return True
|
next $ return True
|
||||||
_ -> stop
|
|
||||||
|
cleanup :: URLString -> Bool -> CommandCleanup
|
||||||
|
cleanup url ok = do
|
||||||
|
when ok $
|
||||||
|
clearFeedProblem url
|
||||||
|
return ok
|
||||||
|
|
||||||
data ToDownload = ToDownload
|
data ToDownload = ToDownload
|
||||||
{ feed :: Feed
|
{ feed :: Feed
|
||||||
|
, feedurl :: URLString
|
||||||
, item :: Item
|
, item :: Item
|
||||||
, location :: URLString
|
, location :: URLString
|
||||||
}
|
}
|
||||||
|
|
||||||
mkToDownload :: Feed -> Item -> Maybe ToDownload
|
mkToDownload :: Feed -> URLString -> Item -> Maybe ToDownload
|
||||||
mkToDownload f i = case getItemEnclosure i of
|
mkToDownload f u i = case getItemEnclosure i of
|
||||||
Nothing -> Nothing
|
Nothing -> Nothing
|
||||||
Just (enclosureurl, _, _) -> Just $ ToDownload f i enclosureurl
|
Just (enclosureurl, _, _) -> Just $ ToDownload f u i enclosureurl
|
||||||
|
|
||||||
data Cache = Cache
|
data Cache = Cache
|
||||||
{ knownurls :: S.Set URLString
|
{ knownurls :: S.Set URLString
|
||||||
|
@ -80,13 +93,10 @@ getCache opttemplate = ifM (Annex.getState Annex.force)
|
||||||
ret s = return $ Cache s tmpl
|
ret s = return $ Cache s tmpl
|
||||||
|
|
||||||
findEnclosures :: URLString -> Annex (Maybe [ToDownload])
|
findEnclosures :: URLString -> Annex (Maybe [ToDownload])
|
||||||
findEnclosures url = go =<< downloadFeed url
|
findEnclosures url = extract <$> downloadFeed url
|
||||||
where
|
where
|
||||||
go Nothing = do
|
extract Nothing = Nothing
|
||||||
warning $ "failed to parse feed " ++ url
|
extract (Just f) = Just $ mapMaybe (mkToDownload f url) (feedItems f)
|
||||||
return Nothing
|
|
||||||
go (Just f) = return $ Just $
|
|
||||||
mapMaybe (mkToDownload f) (feedItems f)
|
|
||||||
|
|
||||||
{- Feeds change, so a feed download cannot be resumed. -}
|
{- Feeds change, so a feed download cannot be resumed. -}
|
||||||
downloadFeed :: URLString -> Annex (Maybe Feed)
|
downloadFeed :: URLString -> Annex (Maybe Feed)
|
||||||
|
@ -95,16 +105,15 @@ downloadFeed url = do
|
||||||
liftIO $ withTmpFile "feed" $ \f h -> do
|
liftIO $ withTmpFile "feed" $ \f h -> do
|
||||||
fileEncoding h
|
fileEncoding h
|
||||||
ifM (Url.download url [] [] f)
|
ifM (Url.download url [] [] f)
|
||||||
( parseFeedString <$> hGetContentsStrict h
|
( liftIO $ parseFeedString <$> hGetContentsStrict h
|
||||||
, return Nothing
|
, return Nothing
|
||||||
)
|
)
|
||||||
|
|
||||||
{- Avoids downloading any urls that are already known to be associated
|
{- Avoids downloading any urls that are already known to be associated
|
||||||
- with a file in the annex, unless forced. -}
|
- with a file in the annex, unless forced. -}
|
||||||
downloadEnclosure :: Bool -> Cache -> ToDownload -> Annex ()
|
downloadEnclosure :: Bool -> Cache -> ToDownload -> Annex Bool
|
||||||
downloadEnclosure relaxed cache enclosure
|
downloadEnclosure relaxed cache enclosure
|
||||||
| S.member url (knownurls cache) =
|
| S.member url (knownurls cache) = ifM forced (go, return True)
|
||||||
whenM forced go
|
|
||||||
| otherwise = go
|
| otherwise = go
|
||||||
where
|
where
|
||||||
forced = Annex.getState Annex.force
|
forced = Annex.getState Annex.force
|
||||||
|
@ -112,13 +121,17 @@ downloadEnclosure relaxed cache enclosure
|
||||||
go = do
|
go = do
|
||||||
dest <- makeunique (1 :: Integer) $ feedFile (template cache) enclosure
|
dest <- makeunique (1 :: Integer) $ feedFile (template cache) enclosure
|
||||||
case dest of
|
case dest of
|
||||||
Nothing -> noop
|
Nothing -> return True
|
||||||
Just f -> do
|
Just f -> do
|
||||||
showStart "addurl" f
|
showStart "addurl" f
|
||||||
ifM (addUrlFile relaxed url f)
|
ok <- addUrlFile relaxed url f
|
||||||
( showEndOk
|
if ok
|
||||||
, showEndFail
|
then do
|
||||||
)
|
showEndOk
|
||||||
|
return True
|
||||||
|
else do
|
||||||
|
showEndFail
|
||||||
|
checkFeedBroken (feedurl enclosure)
|
||||||
{- Find a unique filename to save the url to.
|
{- Find a unique filename to save the url to.
|
||||||
- If the file exists, prefixes it with a number.
|
- If the file exists, prefixes it with a number.
|
||||||
- When forced, the file may already exist and have the same
|
- When forced, the file may already exist and have the same
|
||||||
|
@ -171,3 +184,38 @@ feedFile tmpl i = Utility.Format.format tmpl $ M.fromList
|
||||||
sanitize c
|
sanitize c
|
||||||
| isSpace c || isPunctuation c || c == '/' = '_'
|
| isSpace c || isPunctuation c || c == '/' = '_'
|
||||||
| otherwise = c
|
| otherwise = c
|
||||||
|
|
||||||
|
{- Called when there is a problem with a feed.
|
||||||
|
- Throws an error if the feed is broken, otherwise shows a warning. -}
|
||||||
|
feedProblem :: URLString -> String -> Annex ()
|
||||||
|
feedProblem url message = ifM (checkFeedBroken url)
|
||||||
|
( error $ message ++ " (having repeated problems with this feed!)"
|
||||||
|
, warning $ "warning: " ++ message
|
||||||
|
)
|
||||||
|
|
||||||
|
{- A feed is only broken if problems have occurred repeatedly, for at
|
||||||
|
- least 23 hours. -}
|
||||||
|
checkFeedBroken :: URLString -> Annex Bool
|
||||||
|
checkFeedBroken url = checkFeedBroken' url =<< feedState url
|
||||||
|
checkFeedBroken' :: URLString -> FilePath -> Annex Bool
|
||||||
|
checkFeedBroken' url f = do
|
||||||
|
prev <- maybe Nothing readish <$> liftIO (catchMaybeIO $ readFile f)
|
||||||
|
now <- liftIO getCurrentTime
|
||||||
|
case prev of
|
||||||
|
Nothing -> do
|
||||||
|
createAnnexDirectory (parentDir f)
|
||||||
|
liftIO $ writeFile f $ show now
|
||||||
|
return False
|
||||||
|
Just prevtime -> do
|
||||||
|
let broken = diffUTCTime now prevtime > 60 * 60 * 23
|
||||||
|
when broken $
|
||||||
|
-- Avoid repeatedly complaining about
|
||||||
|
-- broken feed.
|
||||||
|
clearFeedProblem url
|
||||||
|
return broken
|
||||||
|
|
||||||
|
clearFeedProblem :: URLString -> Annex ()
|
||||||
|
clearFeedProblem url = void $ liftIO . tryIO . removeFile =<< feedState url
|
||||||
|
|
||||||
|
feedState :: URLString -> Annex FilePath
|
||||||
|
feedState url = fromRepo . gitAnnexFeedState =<< fromUrl url Nothing
|
||||||
|
|
|
@ -28,6 +28,8 @@ module Locations (
|
||||||
gitAnnexFsckState,
|
gitAnnexFsckState,
|
||||||
gitAnnexTransferDir,
|
gitAnnexTransferDir,
|
||||||
gitAnnexCredsDir,
|
gitAnnexCredsDir,
|
||||||
|
gitAnnexFeedStateDir,
|
||||||
|
gitAnnexFeedState,
|
||||||
gitAnnexMergeDir,
|
gitAnnexMergeDir,
|
||||||
gitAnnexJournalDir,
|
gitAnnexJournalDir,
|
||||||
gitAnnexJournalLock,
|
gitAnnexJournalLock,
|
||||||
|
@ -190,6 +192,13 @@ gitAnnexFsckState r = gitAnnexDir r </> "fsckstate"
|
||||||
gitAnnexCredsDir :: Git.Repo -> FilePath
|
gitAnnexCredsDir :: Git.Repo -> FilePath
|
||||||
gitAnnexCredsDir r = addTrailingPathSeparator $ gitAnnexDir r </> "creds"
|
gitAnnexCredsDir r = addTrailingPathSeparator $ gitAnnexDir r </> "creds"
|
||||||
|
|
||||||
|
{- .git/annex/feeds/ is used to record per-key (url) state by importfeeds -}
|
||||||
|
gitAnnexFeedStateDir :: Git.Repo -> FilePath
|
||||||
|
gitAnnexFeedStateDir r = addTrailingPathSeparator $ gitAnnexDir r </> "feedstate"
|
||||||
|
|
||||||
|
gitAnnexFeedState :: Key -> Git.Repo -> FilePath
|
||||||
|
gitAnnexFeedState k r = gitAnnexFeedStateDir r </> keyFile k
|
||||||
|
|
||||||
{- .git/annex/merge/ is used for direct mode merges. -}
|
{- .git/annex/merge/ is used for direct mode merges. -}
|
||||||
gitAnnexMergeDir :: Git.Repo -> FilePath
|
gitAnnexMergeDir :: Git.Repo -> FilePath
|
||||||
gitAnnexMergeDir r = addTrailingPathSeparator $ gitAnnexDir r </> "merge"
|
gitAnnexMergeDir r = addTrailingPathSeparator $ gitAnnexDir r </> "merge"
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -3,6 +3,8 @@ git-annex (4.20130803) UNRELEASED; urgency=low
|
||||||
* assistant, watcher: .gitignore files and other git ignores are now
|
* assistant, watcher: .gitignore files and other git ignores are now
|
||||||
honored, when git 1.8.4 or newer is installed.
|
honored, when git 1.8.4 or newer is installed.
|
||||||
(Thanks, Adam Spiers, for getting the necessary support into git for this.)
|
(Thanks, Adam Spiers, for getting the necessary support into git for this.)
|
||||||
|
* importfeed: Ignores transient problems with feeds. Only exits nonzero
|
||||||
|
when a feed has repeatedly had a problems for at least 1 day.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Fri, 02 Aug 2013 19:26:20 -0400
|
-- Joey Hess <joeyh@debian.org> Fri, 02 Aug 2013 19:26:20 -0400
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue