From 332385a1174b056be45758e2e978815c4e9b9c66 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 15 Nov 2021 15:31:02 -0400 Subject: [PATCH] use parseFeedFromFile to avoid mojibake As mentioned in commit 2bd778a46ed071c2dd534ebe3b7007b7ae60d1c1, there was mojibake when LANG=C. Looking at parseFeedFromFile, it is very particular to read the file as unicode. parseFeedString looks like it will accept any old String, but a String that was read using the filesystem encoding will not in fact have the right encoding. I think this is a bug in the feed library and will file one. Sponsored-by: Svenne Krap on Patreon --- Command/ImportFeed.hs | 49 +++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 922e354c21..6eda9afba4 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -78,22 +78,29 @@ seek o = do getFeed :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> URLString -> CommandSeek getFeed addunlockedmatcher opts cache url = do showStartOther "importfeed" (Just url) (SeekInput []) - downloadFeed url >>= \case - Nothing -> showEndResult =<< feedProblem url - "downloading the feed failed" - Just feedcontent -> case parseFeedString feedcontent of - Nothing -> debugfeedcontent feedcontent "parsing the feed failed" - Just f -> case findDownloads url f of - [] -> debugfeedcontent feedcontent "bad feed content; no enclosures to download" - l -> do - showEndOk - ifM (and <$> mapM (performDownload addunlockedmatcher opts cache) l) - ( clearFeedProblem url - , void $ feedProblem url - "problem downloading some item(s) from feed" - ) + withTmpFile "feed" $ \tmpf h -> do + liftIO $ hClose h + ifM (downloadFeed url tmpf) + ( go tmpf + , showEndResult =<< feedProblem url + "downloading the feed failed" + ) where - debugfeedcontent feedcontent msg = do + -- Use parseFeedFromFile rather than reading the file + -- ourselves because it goes out of its way to handle encodings. + go tmpf = liftIO (parseFeedFromFile tmpf) >>= \case + Nothing -> debugfeedcontent tmpf "parsing the feed failed" + Just f -> case findDownloads url f of + [] -> debugfeedcontent tmpf "bad feed content; no enclosures to download" + l -> do + showEndOk + ifM (and <$> mapM (performDownload addunlockedmatcher opts cache) l) + ( clearFeedProblem url + , void $ feedProblem url + "problem downloading some item(s) from feed" + ) + debugfeedcontent tmpf msg = do + feedcontent <- liftIO $ readFile tmpf fastDebug "Command.ImportFeed" $ unlines [ "start of feed content" , feedcontent @@ -170,15 +177,11 @@ findDownloads u f = catMaybes $ map mk (feedItems f) Nothing -> Nothing {- Feeds change, so a feed download cannot be resumed. -} -downloadFeed :: URLString -> Annex (Maybe String) -downloadFeed url +downloadFeed :: URLString -> FilePath -> Annex Bool +downloadFeed url f | Url.parseURIRelaxed url == Nothing = giveup "invalid feed url" - | otherwise = withTmpFile "feed" $ \f h -> do - liftIO $ hClose h - ifM (Url.withUrlOptions $ Url.download nullMeterUpdate Nothing url f) - ( Just <$> liftIO (readFileStrict f) - , return Nothing - ) + | otherwise = Url.withUrlOptions $ + Url.download nullMeterUpdate Nothing url f performDownload :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> ToDownload -> Annex Bool performDownload addunlockedmatcher opts cache todownload = case location todownload of