this
" are not detected as being html, -- although some browsers may chose to render them as html. isHtml :: String -> Bool isHtml = evaluate . canonicalizeTags . parseTags . shorten where -- We only care about the beginning of the file, -- so although tagsoup parses lazily anyway, truncate it. shorten = take 16384 evaluate (TagOpen "!DOCTYPE" ((t, _):_):_) = map toLower t == "html" evaluate (TagOpen "html" _:_) = True -- Allow some leading whitespace before the tag. evaluate (TagText t:rest) | all isSpace t = evaluate rest | otherwise = False -- It would be pretty weird to have a html comment before the html -- tag, but easy to allow for. evaluate (TagComment _:rest) = evaluate rest evaluate _ = False