addurl: Avoid crashing when used on beegfs.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2021-07-05 13:02:40 -04:00
parent 17f8682d19
commit b9db859221
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 67 additions and 2 deletions

View file

@ -1,6 +1,6 @@
{- html detection
-
- Copyright 2017 Joey Hess <id@joeyh.name>
- Copyright 2017-2021 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
@ -8,10 +8,12 @@
module Utility.HtmlDetect (
isHtml,
isHtmlBs,
isHtmlFile,
htmlPrefixLength,
) where
import Text.HTML.TagSoup
import System.IO
import Data.Char
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
@ -44,6 +46,15 @@ isHtmlBs :: B.ByteString -> Bool
-- looks for ascii strings.
isHtmlBs = isHtml . B8.unpack
-- | Check if the file is html.
--
-- It would be equivilant to use isHtml <$> readFile file,
-- but since that would not read all of the file, the handle
-- would remain open until it got garbage collected sometime later.
isHtmlFile :: FilePath -> IO Bool
isHtmlFile file = withFile file ReadMode $ \h ->
isHtmlBs <$> B.hGet h htmlPrefixLength
-- | How much of the beginning of a html document is needed to detect it.
-- (conservatively)
htmlPrefixLength :: Int