323 lines
		
	
	
	
		
			10 KiB
			
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			323 lines
		
	
	
	
		
			10 KiB
			
		
	
	
	
		
			Haskell
		
	
	
	
	
	
{- path manipulation
 | 
						|
 -
 | 
						|
 - Copyright 2010-2014 Joey Hess <id@joeyh.name>
 | 
						|
 -
 | 
						|
 - License: BSD-2-clause
 | 
						|
 -}
 | 
						|
 | 
						|
{-# LANGUAGE PackageImports, CPP #-}
 | 
						|
{-# OPTIONS_GHC -fno-warn-tabs #-}
 | 
						|
 | 
						|
module Utility.Path where
 | 
						|
 | 
						|
import System.FilePath
 | 
						|
import Data.List
 | 
						|
import Data.Maybe
 | 
						|
import Data.Char
 | 
						|
import Control.Applicative
 | 
						|
import Prelude
 | 
						|
 | 
						|
#ifdef mingw32_HOST_OS
 | 
						|
import qualified System.FilePath.Posix as Posix
 | 
						|
#else
 | 
						|
import System.Posix.Files
 | 
						|
import Utility.Exception
 | 
						|
#endif
 | 
						|
 | 
						|
import Utility.Monad
 | 
						|
import Utility.UserInfo
 | 
						|
import Utility.Directory
 | 
						|
import Utility.Split
 | 
						|
 | 
						|
{- Simplifies a path, removing any "." component, collapsing "dir/..", 
 | 
						|
 - and removing the trailing path separator.
 | 
						|
 -
 | 
						|
 - On Windows, preserves whichever style of path separator might be used in
 | 
						|
 - the input FilePaths. This is done because some programs in Windows
 | 
						|
 - demand a particular path separator -- and which one actually varies!
 | 
						|
 -
 | 
						|
 - This does not guarantee that two paths that refer to the same location,
 | 
						|
 - and are both relative to the same location (or both absolute) will
 | 
						|
 - yeild the same result. Run both through normalise from System.FilePath
 | 
						|
 - to ensure that.
 | 
						|
 -}
 | 
						|
simplifyPath :: FilePath -> FilePath
 | 
						|
simplifyPath path = dropTrailingPathSeparator $ 
 | 
						|
	joinDrive drive $ joinPath $ norm [] $ splitPath path'
 | 
						|
  where
 | 
						|
	(drive, path') = splitDrive path
 | 
						|
 | 
						|
	norm c [] = reverse c
 | 
						|
	norm c (p:ps)
 | 
						|
		| p' == ".." && not (null c) && dropTrailingPathSeparator (c !! 0) /= ".." = 
 | 
						|
			norm (drop 1 c) ps
 | 
						|
		| p' == "." = norm c ps
 | 
						|
		| otherwise = norm (p:c) ps
 | 
						|
	  where
 | 
						|
		p' = dropTrailingPathSeparator p
 | 
						|
 | 
						|
{- Makes a path absolute.
 | 
						|
 -
 | 
						|
 - The first parameter is a base directory (ie, the cwd) to use if the path
 | 
						|
 - is not already absolute, and should itsef be absolute.
 | 
						|
 -
 | 
						|
 - Does not attempt to deal with edge cases or ensure security with
 | 
						|
 - untrusted inputs.
 | 
						|
 -}
 | 
						|
absPathFrom :: FilePath -> FilePath -> FilePath
 | 
						|
absPathFrom dir path = simplifyPath (combine dir path)
 | 
						|
 | 
						|
{- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -}
 | 
						|
parentDir :: FilePath -> FilePath
 | 
						|
parentDir = takeDirectory . dropTrailingPathSeparator
 | 
						|
 | 
						|
{- Just the parent directory of a path, or Nothing if the path has no
 | 
						|
- parent (ie for "/" or ".") -}
 | 
						|
upFrom :: FilePath -> Maybe FilePath
 | 
						|
upFrom dir
 | 
						|
	| length dirs < 2 = Nothing
 | 
						|
	| otherwise = Just $ joinDrive drive $ intercalate s $ init dirs
 | 
						|
  where
 | 
						|
	-- on Unix, the drive will be "/" when the dir is absolute,
 | 
						|
	-- otherwise ""
 | 
						|
	(drive, path) = splitDrive dir
 | 
						|
	s = [pathSeparator]
 | 
						|
	dirs = filter (not . null) $ split s path
 | 
						|
 | 
						|
prop_upFrom_basics :: FilePath -> Bool
 | 
						|
prop_upFrom_basics dir
 | 
						|
	| null dir = True
 | 
						|
	| dir == "/" = p == Nothing
 | 
						|
	| otherwise = p /= Just dir
 | 
						|
  where
 | 
						|
	p = upFrom dir
 | 
						|
 | 
						|
{- Checks if the first FilePath is, or could be said to contain the second.
 | 
						|
 - For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc
 | 
						|
 - are all equivilant.
 | 
						|
 -}
 | 
						|
dirContains :: FilePath -> FilePath -> Bool
 | 
						|
dirContains a b = a == b || a' == b' || (addTrailingPathSeparator a') `isPrefixOf` b'
 | 
						|
  where
 | 
						|
	a' = norm a
 | 
						|
	b' = norm b
 | 
						|
	norm = normalise . simplifyPath
 | 
						|
 | 
						|
{- Converts a filename into an absolute path.
 | 
						|
 -
 | 
						|
 - Unlike Directory.canonicalizePath, this does not require the path
 | 
						|
 - already exists. -}
 | 
						|
absPath :: FilePath -> IO FilePath
 | 
						|
absPath file = do
 | 
						|
	cwd <- getCurrentDirectory
 | 
						|
	return $ absPathFrom cwd file
 | 
						|
 | 
						|
{- Constructs a relative path from the CWD to a file.
 | 
						|
 -
 | 
						|
 - For example, assuming CWD is /tmp/foo/bar:
 | 
						|
 -    relPathCwdToFile "/tmp/foo" == ".."
 | 
						|
 -    relPathCwdToFile "/tmp/foo/bar" == "" 
 | 
						|
 -}
 | 
						|
relPathCwdToFile :: FilePath -> IO FilePath
 | 
						|
relPathCwdToFile f = do
 | 
						|
	c <- getCurrentDirectory
 | 
						|
	relPathDirToFile c f
 | 
						|
 | 
						|
{- Constructs a relative path from a directory to a file. -}
 | 
						|
relPathDirToFile :: FilePath -> FilePath -> IO FilePath
 | 
						|
relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to
 | 
						|
 | 
						|
{- This requires the first path to be absolute, and the
 | 
						|
 - second path cannot contain ../ or ./
 | 
						|
 -
 | 
						|
 - On Windows, if the paths are on different drives,
 | 
						|
 - a relative path is not possible and the path is simply
 | 
						|
 - returned as-is.
 | 
						|
 -}
 | 
						|
relPathDirToFileAbs :: FilePath -> FilePath -> FilePath
 | 
						|
relPathDirToFileAbs from to
 | 
						|
#ifdef mingw32_HOST_OS
 | 
						|
	| normdrive from /= normdrive to = to
 | 
						|
#endif
 | 
						|
	| otherwise = joinPath $ dotdots ++ uncommon
 | 
						|
  where
 | 
						|
	pfrom = sp from
 | 
						|
	pto = sp to
 | 
						|
	sp = map dropTrailingPathSeparator . splitPath . dropDrive
 | 
						|
	common = map fst $ takeWhile same $ zip pfrom pto
 | 
						|
	same (c,d) = c == d
 | 
						|
	uncommon = drop numcommon pto
 | 
						|
	dotdots = replicate (length pfrom - numcommon) ".."
 | 
						|
	numcommon = length common
 | 
						|
#ifdef mingw32_HOST_OS
 | 
						|
	normdrive = map toLower . takeWhile (/= ':') . takeDrive
 | 
						|
#endif
 | 
						|
 | 
						|
prop_relPathDirToFile_basics :: FilePath -> FilePath -> Bool
 | 
						|
prop_relPathDirToFile_basics from to
 | 
						|
	| null from || null to = True
 | 
						|
	| from == to = null r
 | 
						|
	| otherwise = not (null r)
 | 
						|
  where
 | 
						|
	r = relPathDirToFileAbs from to 
 | 
						|
 | 
						|
prop_relPathDirToFile_regressionTest :: Bool
 | 
						|
prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference
 | 
						|
  where
 | 
						|
	{- Two paths have the same directory component at the same
 | 
						|
	 - location, but it's not really the same directory.
 | 
						|
	 - Code used to get this wrong. -}
 | 
						|
	same_dir_shortcurcuits_at_difference =
 | 
						|
		relPathDirToFileAbs (joinPath [pathSeparator : "tmp", "r", "lll", "xxx", "yyy", "18"])
 | 
						|
			(joinPath [pathSeparator : "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"])
 | 
						|
				== joinPath ["..", "..", "..", "..", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]
 | 
						|
 | 
						|
{- Given an original list of paths, and an expanded list derived from it,
 | 
						|
 - which may be arbitrarily reordered, generates a list of lists, where
 | 
						|
 - each sublist corresponds to one of the original paths.
 | 
						|
 -
 | 
						|
 - When the original path is a directory, any items in the expanded list
 | 
						|
 - that are contained in that directory will appear in its segment.
 | 
						|
 -
 | 
						|
 - The order of the original list of paths is attempted to be preserved in
 | 
						|
 - the order of the returned segments. However, doing so has a O^NM
 | 
						|
 - growth factor. So, if the original list has more than 100 paths on it,
 | 
						|
 - we stop preserving ordering at that point. Presumably a user passing
 | 
						|
 - that many paths in doesn't care too much about order of the later ones.
 | 
						|
 -}
 | 
						|
segmentPaths :: [FilePath] -> [FilePath] -> [[FilePath]]
 | 
						|
segmentPaths [] new = [new]
 | 
						|
segmentPaths [_] new = [new] -- optimisation
 | 
						|
segmentPaths (l:ls) new = found : segmentPaths ls rest
 | 
						|
  where
 | 
						|
	(found, rest) = if length ls < 100
 | 
						|
		then partition (l `dirContains`) new
 | 
						|
		else break (\p -> not (l `dirContains` p)) new
 | 
						|
 | 
						|
{- This assumes that it's cheaper to call segmentPaths on the result,
 | 
						|
 - than it would be to run the action separately with each path. In
 | 
						|
 - the case of git file list commands, that assumption tends to hold.
 | 
						|
 -}
 | 
						|
runSegmentPaths :: ([FilePath] -> IO [FilePath]) -> [FilePath] -> IO [[FilePath]]
 | 
						|
runSegmentPaths a paths = segmentPaths paths <$> a paths
 | 
						|
 | 
						|
{- Converts paths in the home directory to use ~/ -}
 | 
						|
relHome :: FilePath -> IO String
 | 
						|
relHome path = do
 | 
						|
	home <- myHomeDir
 | 
						|
	return $ if dirContains home path
 | 
						|
		then "~/" ++ relPathDirToFileAbs home path
 | 
						|
		else path
 | 
						|
 | 
						|
{- Checks if a command is available in PATH.
 | 
						|
 -
 | 
						|
 - The command may be fully-qualified, in which case, this succeeds as
 | 
						|
 - long as it exists. -}
 | 
						|
inPath :: String -> IO Bool
 | 
						|
inPath command = isJust <$> searchPath command
 | 
						|
 | 
						|
{- Finds a command in PATH and returns the full path to it.
 | 
						|
 -
 | 
						|
 - The command may be fully qualified already, in which case it will
 | 
						|
 - be returned if it exists.
 | 
						|
 -
 | 
						|
 - Note that this will find commands in PATH that are not executable.
 | 
						|
 -}
 | 
						|
searchPath :: String -> IO (Maybe FilePath)
 | 
						|
searchPath command
 | 
						|
	| isAbsolute command = check command
 | 
						|
	| otherwise = getSearchPath >>= getM indir
 | 
						|
  where
 | 
						|
	indir d = check $ d </> command
 | 
						|
	check f = firstM doesFileExist
 | 
						|
#ifdef mingw32_HOST_OS
 | 
						|
		[f, f ++ ".exe"]
 | 
						|
#else
 | 
						|
		[f]
 | 
						|
#endif
 | 
						|
 | 
						|
{- Checks if a filename is a unix dotfile. All files inside dotdirs
 | 
						|
 - count as dotfiles. -}
 | 
						|
dotfile :: FilePath -> Bool
 | 
						|
dotfile file
 | 
						|
	| f == "." = False
 | 
						|
	| f == ".." = False
 | 
						|
	| f == "" = False
 | 
						|
	| otherwise = "." `isPrefixOf` f || dotfile (takeDirectory file)
 | 
						|
  where
 | 
						|
	f = takeFileName file
 | 
						|
 | 
						|
{- Converts a DOS style path to a msys2 style path. Only on Windows.
 | 
						|
 - Any trailing '\' is preserved as a trailing '/' 
 | 
						|
 - 
 | 
						|
 - Taken from: http://sourceforge.net/p/msys2/wiki/MSYS2%20introduction/i
 | 
						|
 -
 | 
						|
 - The virtual filesystem contains:
 | 
						|
 -  /c, /d, ...	mount points for Windows drives
 | 
						|
 -}
 | 
						|
toMSYS2Path :: FilePath -> FilePath
 | 
						|
#ifndef mingw32_HOST_OS
 | 
						|
toMSYS2Path = id
 | 
						|
#else
 | 
						|
toMSYS2Path p
 | 
						|
	| null drive = recombine parts
 | 
						|
	| otherwise = recombine $ "/" : driveletter drive : parts
 | 
						|
  where
 | 
						|
	(drive, p') = splitDrive p
 | 
						|
	parts = splitDirectories p'
 | 
						|
	driveletter = map toLower . takeWhile (/= ':')
 | 
						|
	recombine = fixtrailing . Posix.joinPath
 | 
						|
	fixtrailing s
 | 
						|
		| hasTrailingPathSeparator p = Posix.addTrailingPathSeparator s
 | 
						|
		| otherwise = s
 | 
						|
#endif
 | 
						|
 | 
						|
{- Maximum size to use for a file in a specified directory.
 | 
						|
 -
 | 
						|
 - Many systems have a 255 byte limit to the name of a file, 
 | 
						|
 - so that's taken as the max if the system has a larger limit, or has no
 | 
						|
 - limit.
 | 
						|
 -}
 | 
						|
fileNameLengthLimit :: FilePath -> IO Int
 | 
						|
#ifdef mingw32_HOST_OS
 | 
						|
fileNameLengthLimit _ = return 255
 | 
						|
#else
 | 
						|
fileNameLengthLimit dir = do
 | 
						|
	-- getPathVar can fail due to statfs(2) overflow
 | 
						|
	l <- catchDefaultIO 0 $
 | 
						|
		fromIntegral <$> getPathVar dir FileNameLimit
 | 
						|
	if l <= 0
 | 
						|
		then return 255
 | 
						|
		else return $ minimum [l, 255]
 | 
						|
#endif
 | 
						|
 | 
						|
{- Given a string that we'd like to use as the basis for FilePath, but that
 | 
						|
 - was provided by a third party and is not to be trusted, returns the closest
 | 
						|
 - sane FilePath.
 | 
						|
 -
 | 
						|
 - All spaces and punctuation and other wacky stuff are replaced
 | 
						|
 - with '_', except for '.'
 | 
						|
 - "../" will thus turn into ".._", which is safe.
 | 
						|
 -}
 | 
						|
sanitizeFilePath :: String -> FilePath
 | 
						|
sanitizeFilePath = map sanitize
 | 
						|
  where
 | 
						|
	sanitize c
 | 
						|
		| c == '.' = c
 | 
						|
		| isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_'
 | 
						|
		| otherwise = c
 | 
						|
 | 
						|
{- Similar to splitExtensions, but knows that some things in FilePaths
 | 
						|
 - after a dot are too long to be extensions. -}
 | 
						|
splitShortExtensions :: FilePath -> (FilePath, [String])
 | 
						|
splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg"
 | 
						|
splitShortExtensions' :: Int -> FilePath -> (FilePath, [String])
 | 
						|
splitShortExtensions' maxextension = go []
 | 
						|
  where
 | 
						|
	go c f
 | 
						|
		| len > 0 && len <= maxextension && not (null base) = 
 | 
						|
			go (ext:c) base
 | 
						|
		| otherwise = (f, c)
 | 
						|
	  where
 | 
						|
		(base, ext) = splitExtension f
 | 
						|
		len = length ext
 |