This commit is contained in:
Joey Hess 2017-11-28 17:17:40 -04:00
parent 53f91bddfa
commit 3febb79c8f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 149 additions and 80 deletions

View file

@ -18,11 +18,11 @@ import Data.Char
-- Html fragments like "<p>this</p>" are not detected as being html,
-- although some browsers may chose to render them as html.
isHtml :: String -> Bool
isHtml = evaluate . canonicalizeTags . parseTags . truncate
isHtml = evaluate . canonicalizeTags . parseTags . shorten
where
-- We only care about the beginning of the file,
-- so although tagsoup parses lazily anyway, truncate it.
truncate = take 16384
shorten = take 16384
evaluate (TagOpen "!DOCTYPE" ((t, _):_):_) = map toLower t == "html"
evaluate (TagOpen "html" _:_) = True
-- Allow some leading whitespace before the tag.