git-annex/Annex/UntrustedFilePath.hs
Joey Hess a576fc3b12
fix mojibake reversion in display of utf8
When displaying a ByteString like "💕", safeOutput operates on
individual bytes like "\240\159\146\149" and isControl '\146' = True,
so it got truncated to just "\240".

So, only treat the low control characters, and DEL, as control
characters.

Also split Utility.Terminal out of Utility.SafeOutput. The latter needs
win32, but Utility.SafeOutput is used by Control.Exception, which is
used by Setup.

Sponsored-by: Nicholas Golder-Manning on Patreon
2023-04-12 13:53:30 -04:00

77 lines
2.5 KiB
Haskell

{- handling untrusted filepaths
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.UntrustedFilePath where
import Data.Char
import System.FilePath
import Utility.SafeOutput
{- Given a string that we'd like to use as the basis for FilePath, but that
- was provided by a third party and is not to be trusted, returns the closest
- sane FilePath.
-
- All spaces and punctuation and other wacky stuff are replaced
- with '_', except for '.' and '-'
-
- "../" becomes ".._", which is safe.
- "/foo" becomes "_foo", which is safe.
- "c:foo" becomes "c_foo", which is safe even on windows.
-
- Leading '.' and '-' are also replaced with '_', so
- so no dotfiles that might control a program are inadvertently created,
- and to avoid filenames being treated as options to commands the user
- might run.
-
- Also there's an off chance the string might be empty, so to avoid
- needing to handle such an invalid filepath, return a dummy "file" in
- that case.
-}
sanitizeFilePath :: String -> FilePath
sanitizeFilePath = sanitizeLeadingFilePathCharacter . sanitizeFilePathComponent
{- For when the filepath is being built up out of components that should be
- individually sanitized, this can be used for each component, followed by
- sanitizeLeadingFilePathCharacter for the whole thing.
-}
sanitizeFilePathComponent :: String -> String
sanitizeFilePathComponent = map sanitize
where
sanitize c
| c == '.' || c == '-' = c
| isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_'
| otherwise = c
sanitizeLeadingFilePathCharacter :: String -> FilePath
sanitizeLeadingFilePathCharacter [] = "file"
sanitizeLeadingFilePathCharacter ('.':s) = '_':s
sanitizeLeadingFilePathCharacter ('-':s) = '_':s
sanitizeLeadingFilePathCharacter ('/':s) = '_':s
sanitizeLeadingFilePathCharacter s = s
controlCharacterInFilePath :: FilePath -> Bool
controlCharacterInFilePath = any (not . safechar)
where
safechar c = safeOutputChar c && c /= '\n'
{- ../ is a path traversal, no matter where it appears.
-
- An absolute path is, of course.
-}
pathTraversalInFilePath :: FilePath -> Bool
pathTraversalInFilePath f
| isAbsolute f = True
| any (== "..") (splitPath f) = True
-- On windows, C:foo with no directory is not considered absolute
| hasDrive f = True
| otherwise = False
gitDirectoryInFilePath :: FilePath -> Bool
gitDirectoryInFilePath = any (== ".git")
. map dropTrailingPathSeparator
. splitPath