2011-08-22 20:14:12 +00:00
|
|
|
{- path manipulation
|
|
|
|
-
|
addurl --preserve-filename and a few related changes
* addurl --preserve-filename: New option, uses server-provided filename
without any sanitization, but with some security checking.
Not yet implemented for remotes other than the web.
* addurl, importfeed: Avoid adding filenames with leading '.', instead
it will be replaced with '_'.
This might be considered a security fix, but a CVE seems unwattanted.
It was possible for addurl to create a dotfile, which could change
behavior of some program. It was also possible for a web server to say
the file name was ".git" or "foo/.git". That would not overrwrite the
.git directory, but would cause addurl to fail; of course git won't
add "foo/.git".
sanitizeFilePath is too opinionated to remain in Utility, so moved it.
The changes to mkSafeFilePath are because it used sanitizeFilePath.
In particular:
isDrive will never succeed, because "c:" gets munged to "c_"
".." gets sanitized now
".git" gets sanitized now
It will never be null, because sanitizeFilePath keeps the length
the same, and splitDirectories never returns a null path.
Also, on the off chance a web server suggests a filename of "",
ignore that, rather than trying to save to such a filename, which would
fail in some way.
2020-05-08 20:09:29 +00:00
|
|
|
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
|
2011-08-22 20:14:12 +00:00
|
|
|
-
|
2014-05-10 14:01:27 +00:00
|
|
|
- License: BSD-2-clause
|
2011-08-22 20:14:12 +00:00
|
|
|
-}
|
2013-04-14 16:51:05 +00:00
|
|
|
|
2017-12-31 20:08:31 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
2015-05-10 20:31:50 +00:00
|
|
|
{-# OPTIONS_GHC -fno-warn-tabs #-}
|
2011-08-22 20:14:12 +00:00
|
|
|
|
2019-11-21 19:38:06 +00:00
|
|
|
module Utility.Path (
|
|
|
|
simplifyPath,
|
|
|
|
absPathFrom,
|
|
|
|
parentDir,
|
|
|
|
upFrom,
|
|
|
|
dirContains,
|
|
|
|
absPath,
|
|
|
|
relPathCwdToFile,
|
|
|
|
relPathDirToFile,
|
|
|
|
relPathDirToFileAbs,
|
|
|
|
segmentPaths,
|
2020-09-14 20:49:33 +00:00
|
|
|
segmentPaths',
|
2019-11-21 19:38:06 +00:00
|
|
|
runSegmentPaths,
|
2020-09-14 20:49:33 +00:00
|
|
|
runSegmentPaths',
|
2019-11-21 19:38:06 +00:00
|
|
|
relHome,
|
|
|
|
inPath,
|
|
|
|
searchPath,
|
|
|
|
dotfile,
|
|
|
|
splitShortExtensions,
|
|
|
|
|
|
|
|
prop_upFrom_basics,
|
|
|
|
prop_relPathDirToFile_basics,
|
|
|
|
prop_relPathDirToFile_regressionTest,
|
|
|
|
) where
|
2011-08-22 20:14:12 +00:00
|
|
|
|
|
|
|
import System.FilePath
|
|
|
|
import Data.List
|
|
|
|
import Data.Maybe
|
2020-06-30 15:01:28 +00:00
|
|
|
#ifdef mingw32_HOST_OS
|
|
|
|
import Data.Char
|
|
|
|
#endif
|
2011-08-25 04:28:55 +00:00
|
|
|
import Control.Applicative
|
2015-05-10 20:19:56 +00:00
|
|
|
import Prelude
|
2011-10-16 04:31:25 +00:00
|
|
|
|
|
|
|
import Utility.Monad
|
2012-10-25 22:17:32 +00:00
|
|
|
import Utility.UserInfo
|
2020-03-05 17:52:30 +00:00
|
|
|
import Utility.SystemDirectory
|
2017-05-16 03:32:17 +00:00
|
|
|
import Utility.Split
|
2019-11-26 19:27:22 +00:00
|
|
|
import Utility.FileSystemEncoding
|
2011-08-22 20:14:12 +00:00
|
|
|
|
2015-08-04 18:58:21 +00:00
|
|
|
{- Simplifies a path, removing any "." component, collapsing "dir/..",
|
|
|
|
- and removing the trailing path separator.
|
2014-02-07 21:10:51 +00:00
|
|
|
-
|
|
|
|
- On Windows, preserves whichever style of path separator might be used in
|
|
|
|
- the input FilePaths. This is done because some programs in Windows
|
|
|
|
- demand a particular path separator -- and which one actually varies!
|
|
|
|
-
|
|
|
|
- This does not guarantee that two paths that refer to the same location,
|
|
|
|
- and are both relative to the same location (or both absolute) will
|
|
|
|
- yeild the same result. Run both through normalise from System.FilePath
|
|
|
|
- to ensure that.
|
|
|
|
-}
|
|
|
|
simplifyPath :: FilePath -> FilePath
|
|
|
|
simplifyPath path = dropTrailingPathSeparator $
|
|
|
|
joinDrive drive $ joinPath $ norm [] $ splitPath path'
|
|
|
|
where
|
|
|
|
(drive, path') = splitDrive path
|
|
|
|
|
|
|
|
norm c [] = reverse c
|
|
|
|
norm c (p:ps)
|
2015-08-04 18:58:21 +00:00
|
|
|
| p' == ".." && not (null c) && dropTrailingPathSeparator (c !! 0) /= ".." =
|
|
|
|
norm (drop 1 c) ps
|
2014-02-07 21:10:51 +00:00
|
|
|
| p' == "." = norm c ps
|
|
|
|
| otherwise = norm (p:c) ps
|
|
|
|
where
|
|
|
|
p' = dropTrailingPathSeparator p
|
|
|
|
|
|
|
|
{- Makes a path absolute.
|
2020-03-05 17:52:30 +00:00
|
|
|
-
|
|
|
|
- Also simplifies it using simplifyPath.
|
2014-02-07 21:10:51 +00:00
|
|
|
-
|
2013-05-12 21:36:44 +00:00
|
|
|
- The first parameter is a base directory (ie, the cwd) to use if the path
|
2016-01-05 21:33:48 +00:00
|
|
|
- is not already absolute, and should itsef be absolute.
|
2013-05-12 21:36:44 +00:00
|
|
|
-
|
2014-02-07 21:10:51 +00:00
|
|
|
- Does not attempt to deal with edge cases or ensure security with
|
|
|
|
- untrusted inputs.
|
2013-05-12 21:36:44 +00:00
|
|
|
-}
|
2014-02-07 21:10:51 +00:00
|
|
|
absPathFrom :: FilePath -> FilePath -> FilePath
|
|
|
|
absPathFrom dir path = simplifyPath (combine dir path)
|
2014-02-06 21:08:54 +00:00
|
|
|
|
2015-01-09 18:26:52 +00:00
|
|
|
{- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -}
|
2015-01-09 17:11:56 +00:00
|
|
|
parentDir :: FilePath -> FilePath
|
2015-01-09 18:26:52 +00:00
|
|
|
parentDir = takeDirectory . dropTrailingPathSeparator
|
|
|
|
|
|
|
|
{- Just the parent directory of a path, or Nothing if the path has no
|
2020-05-11 19:03:35 +00:00
|
|
|
- parent (ie for "/" or "." or "foo") -}
|
2015-01-09 18:26:52 +00:00
|
|
|
upFrom :: FilePath -> Maybe FilePath
|
|
|
|
upFrom dir
|
2015-01-20 21:23:23 +00:00
|
|
|
| length dirs < 2 = Nothing
|
2017-05-16 03:32:17 +00:00
|
|
|
| otherwise = Just $ joinDrive drive $ intercalate s $ init dirs
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2017-05-16 03:32:17 +00:00
|
|
|
-- on Unix, the drive will be "/" when the dir is absolute,
|
|
|
|
-- otherwise ""
|
2013-05-12 18:58:46 +00:00
|
|
|
(drive, path) = splitDrive dir
|
2012-12-13 04:24:19 +00:00
|
|
|
s = [pathSeparator]
|
2017-05-16 03:32:17 +00:00
|
|
|
dirs = filter (not . null) $ split s path
|
2011-08-22 20:14:12 +00:00
|
|
|
|
2015-01-09 18:26:52 +00:00
|
|
|
prop_upFrom_basics :: FilePath -> Bool
|
|
|
|
prop_upFrom_basics dir
|
2011-08-22 20:14:12 +00:00
|
|
|
| null dir = True
|
2015-01-09 18:26:52 +00:00
|
|
|
| dir == "/" = p == Nothing
|
|
|
|
| otherwise = p /= Just dir
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2015-01-09 18:26:52 +00:00
|
|
|
p = upFrom dir
|
2011-08-22 20:14:12 +00:00
|
|
|
|
|
|
|
{- Checks if the first FilePath is, or could be said to contain the second.
|
|
|
|
- For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc
|
|
|
|
- are all equivilant.
|
|
|
|
-}
|
|
|
|
dirContains :: FilePath -> FilePath -> Bool
|
2019-12-20 22:01:29 +00:00
|
|
|
dirContains a b = a == b
|
|
|
|
|| a' == b'
|
|
|
|
|| (addTrailingPathSeparator a') `isPrefixOf` b'
|
|
|
|
|| a' == "." && normalise ("." </> b') == b'
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
a' = norm a
|
|
|
|
b' = norm b
|
2014-02-07 21:10:51 +00:00
|
|
|
norm = normalise . simplifyPath
|
2011-08-22 20:14:12 +00:00
|
|
|
|
2014-02-07 21:10:51 +00:00
|
|
|
{- Converts a filename into an absolute path.
|
2020-03-05 17:52:30 +00:00
|
|
|
-
|
|
|
|
- Also simplifies it using simplifyPath.
|
2012-01-23 20:57:45 +00:00
|
|
|
-
|
|
|
|
- Unlike Directory.canonicalizePath, this does not require the path
|
|
|
|
- already exists. -}
|
2011-08-22 20:14:12 +00:00
|
|
|
absPath :: FilePath -> IO FilePath
|
2020-03-05 17:52:30 +00:00
|
|
|
absPath file
|
|
|
|
-- Avoid unncessarily getting the current directory when the path
|
|
|
|
-- is already absolute. absPathFrom uses simplifyPath
|
|
|
|
-- so also used here for consistency.
|
|
|
|
| isAbsolute file = return $ simplifyPath file
|
|
|
|
| otherwise = do
|
|
|
|
cwd <- getCurrentDirectory
|
|
|
|
return $ absPathFrom cwd file
|
2011-08-22 20:14:12 +00:00
|
|
|
|
|
|
|
{- Constructs a relative path from the CWD to a file.
|
|
|
|
-
|
|
|
|
- For example, assuming CWD is /tmp/foo/bar:
|
|
|
|
- relPathCwdToFile "/tmp/foo" == ".."
|
|
|
|
- relPathCwdToFile "/tmp/foo/bar" == ""
|
|
|
|
-}
|
|
|
|
relPathCwdToFile :: FilePath -> IO FilePath
|
2015-01-06 19:31:24 +00:00
|
|
|
relPathCwdToFile f = do
|
|
|
|
c <- getCurrentDirectory
|
|
|
|
relPathDirToFile c f
|
2011-08-22 20:14:12 +00:00
|
|
|
|
2015-01-06 19:31:24 +00:00
|
|
|
{- Constructs a relative path from a directory to a file. -}
|
|
|
|
relPathDirToFile :: FilePath -> FilePath -> IO FilePath
|
|
|
|
relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to
|
|
|
|
|
|
|
|
{- This requires the first path to be absolute, and the
|
|
|
|
- second path cannot contain ../ or ./
|
2015-04-14 18:07:55 +00:00
|
|
|
-
|
|
|
|
- On Windows, if the paths are on different drives,
|
|
|
|
- a relative path is not possible and the path is simply
|
|
|
|
- returned as-is.
|
2011-08-22 20:14:12 +00:00
|
|
|
-}
|
2015-01-06 19:31:24 +00:00
|
|
|
relPathDirToFileAbs :: FilePath -> FilePath -> FilePath
|
2015-04-14 18:07:55 +00:00
|
|
|
relPathDirToFileAbs from to
|
2017-10-25 23:04:26 +00:00
|
|
|
#ifdef mingw32_HOST_OS
|
|
|
|
| normdrive from /= normdrive to = to
|
|
|
|
#endif
|
2017-05-15 21:13:08 +00:00
|
|
|
| otherwise = joinPath $ dotdots ++ uncommon
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2017-05-15 21:13:08 +00:00
|
|
|
pfrom = sp from
|
|
|
|
pto = sp to
|
2017-10-26 16:01:16 +00:00
|
|
|
sp = map dropTrailingPathSeparator . splitPath . dropDrive
|
2012-12-13 04:24:19 +00:00
|
|
|
common = map fst $ takeWhile same $ zip pfrom pto
|
2017-05-16 03:32:17 +00:00
|
|
|
same (c,d) = c == d
|
2012-12-13 04:24:19 +00:00
|
|
|
uncommon = drop numcommon pto
|
|
|
|
dotdots = replicate (length pfrom - numcommon) ".."
|
|
|
|
numcommon = length common
|
2017-10-25 23:04:26 +00:00
|
|
|
#ifdef mingw32_HOST_OS
|
|
|
|
normdrive = map toLower . takeWhile (/= ':') . takeDrive
|
|
|
|
#endif
|
2011-08-22 20:14:12 +00:00
|
|
|
|
|
|
|
prop_relPathDirToFile_basics :: FilePath -> FilePath -> Bool
|
|
|
|
prop_relPathDirToFile_basics from to
|
2015-04-14 19:15:29 +00:00
|
|
|
| null from || null to = True
|
2011-08-22 20:14:12 +00:00
|
|
|
| from == to = null r
|
|
|
|
| otherwise = not (null r)
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2015-01-06 19:31:24 +00:00
|
|
|
r = relPathDirToFileAbs from to
|
2011-09-19 05:37:04 +00:00
|
|
|
|
2012-03-05 16:42:52 +00:00
|
|
|
prop_relPathDirToFile_regressionTest :: Bool
|
|
|
|
prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
{- Two paths have the same directory component at the same
|
|
|
|
- location, but it's not really the same directory.
|
|
|
|
- Code used to get this wrong. -}
|
|
|
|
same_dir_shortcurcuits_at_difference =
|
2015-01-06 19:31:24 +00:00
|
|
|
relPathDirToFileAbs (joinPath [pathSeparator : "tmp", "r", "lll", "xxx", "yyy", "18"])
|
2013-06-18 17:08:28 +00:00
|
|
|
(joinPath [pathSeparator : "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"])
|
|
|
|
== joinPath ["..", "..", "..", "..", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]
|
2012-03-05 16:42:52 +00:00
|
|
|
|
2012-11-25 21:54:08 +00:00
|
|
|
{- Given an original list of paths, and an expanded list derived from it,
|
2015-04-02 05:44:32 +00:00
|
|
|
- which may be arbitrarily reordered, generates a list of lists, where
|
|
|
|
- each sublist corresponds to one of the original paths.
|
|
|
|
-
|
|
|
|
- When the original path is a directory, any items in the expanded list
|
|
|
|
- that are contained in that directory will appear in its segment.
|
|
|
|
-
|
|
|
|
- The order of the original list of paths is attempted to be preserved in
|
|
|
|
- the order of the returned segments. However, doing so has a O^NM
|
|
|
|
- growth factor. So, if the original list has more than 100 paths on it,
|
|
|
|
- we stop preserving ordering at that point. Presumably a user passing
|
|
|
|
- that many paths in doesn't care too much about order of the later ones.
|
2011-09-19 05:37:04 +00:00
|
|
|
-}
|
2020-07-10 17:54:52 +00:00
|
|
|
segmentPaths :: (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[a]]
|
2020-09-14 20:49:33 +00:00
|
|
|
segmentPaths = segmentPaths' (\_ r -> r)
|
|
|
|
|
|
|
|
segmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[r]]
|
|
|
|
segmentPaths' f _ [] new = [map (f Nothing) new]
|
|
|
|
segmentPaths' f _ [i] new = [map (f (Just i)) new] -- optimisation
|
|
|
|
segmentPaths' f c (i:is) new =
|
|
|
|
map (f (Just i)) found : segmentPaths' f c is rest
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2020-09-14 20:49:33 +00:00
|
|
|
(found, rest) = if length is < 100
|
|
|
|
then partition ini new
|
|
|
|
else break (not . ini) new
|
2020-09-25 15:07:41 +00:00
|
|
|
ini p = i' `dirContains` fromRawFilePath (c p)
|
2020-09-14 20:49:33 +00:00
|
|
|
i' = fromRawFilePath i
|
2011-09-19 05:37:04 +00:00
|
|
|
|
2012-11-25 21:54:08 +00:00
|
|
|
{- This assumes that it's cheaper to call segmentPaths on the result,
|
|
|
|
- than it would be to run the action separately with each path. In
|
|
|
|
- the case of git file list commands, that assumption tends to hold.
|
2011-09-19 05:37:04 +00:00
|
|
|
-}
|
2020-09-25 15:38:42 +00:00
|
|
|
runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO ([a], v)) -> [RawFilePath] -> IO ([[a]], v)
|
|
|
|
runSegmentPaths c a paths = do
|
|
|
|
(l, cleanup) <- a paths
|
|
|
|
return (segmentPaths c paths l, cleanup)
|
2011-10-16 04:31:25 +00:00
|
|
|
|
2020-09-25 15:38:42 +00:00
|
|
|
runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO ([a], v)) -> [RawFilePath] -> IO ([[r]], v)
|
|
|
|
runSegmentPaths' si c a paths = do
|
|
|
|
(l, cleanup) <- a paths
|
|
|
|
return (segmentPaths' si c paths l, cleanup)
|
2020-09-14 20:49:33 +00:00
|
|
|
|
2012-08-02 11:47:50 +00:00
|
|
|
{- Converts paths in the home directory to use ~/ -}
|
|
|
|
relHome :: FilePath -> IO String
|
|
|
|
relHome path = do
|
|
|
|
home <- myHomeDir
|
|
|
|
return $ if dirContains home path
|
2015-01-06 19:31:24 +00:00
|
|
|
then "~/" ++ relPathDirToFileAbs home path
|
2012-08-02 11:47:50 +00:00
|
|
|
else path
|
|
|
|
|
2012-12-14 19:52:44 +00:00
|
|
|
{- Checks if a command is available in PATH.
|
|
|
|
-
|
|
|
|
- The command may be fully-qualified, in which case, this succeeds as
|
|
|
|
- long as it exists. -}
|
2011-10-16 04:31:25 +00:00
|
|
|
inPath :: String -> IO Bool
|
2012-12-14 19:52:44 +00:00
|
|
|
inPath command = isJust <$> searchPath command
|
|
|
|
|
|
|
|
{- Finds a command in PATH and returns the full path to it.
|
|
|
|
-
|
|
|
|
- The command may be fully qualified already, in which case it will
|
|
|
|
- be returned if it exists.
|
2017-03-08 19:56:55 +00:00
|
|
|
-
|
|
|
|
- Note that this will find commands in PATH that are not executable.
|
2012-12-14 19:52:44 +00:00
|
|
|
-}
|
|
|
|
searchPath :: String -> IO (Maybe FilePath)
|
|
|
|
searchPath command
|
|
|
|
| isAbsolute command = check command
|
|
|
|
| otherwise = getSearchPath >>= getM indir
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2012-12-14 19:52:44 +00:00
|
|
|
indir d = check $ d </> command
|
2013-07-06 04:48:47 +00:00
|
|
|
check f = firstM doesFileExist
|
2013-08-02 16:27:32 +00:00
|
|
|
#ifdef mingw32_HOST_OS
|
2013-07-06 04:48:47 +00:00
|
|
|
[f, f ++ ".exe"]
|
|
|
|
#else
|
|
|
|
[f]
|
|
|
|
#endif
|
2012-01-03 04:09:09 +00:00
|
|
|
|
|
|
|
{- Checks if a filename is a unix dotfile. All files inside dotdirs
|
|
|
|
- count as dotfiles. -}
|
|
|
|
dotfile :: FilePath -> Bool
|
|
|
|
dotfile file
|
|
|
|
| f == "." = False
|
|
|
|
| f == ".." = False
|
|
|
|
| f == "" = False
|
|
|
|
| otherwise = "." `isPrefixOf` f || dotfile (takeDirectory file)
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
f = takeFileName file
|
2013-05-14 17:24:15 +00:00
|
|
|
|
2014-02-16 21:39:54 +00:00
|
|
|
{- Similar to splitExtensions, but knows that some things in FilePaths
|
|
|
|
- after a dot are too long to be extensions. -}
|
|
|
|
splitShortExtensions :: FilePath -> (FilePath, [String])
|
|
|
|
splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg"
|
|
|
|
splitShortExtensions' :: Int -> FilePath -> (FilePath, [String])
|
|
|
|
splitShortExtensions' maxextension = go []
|
|
|
|
where
|
|
|
|
go c f
|
|
|
|
| len > 0 && len <= maxextension && not (null base) =
|
|
|
|
go (ext:c) base
|
|
|
|
| otherwise = (f, c)
|
|
|
|
where
|
|
|
|
(base, ext) = splitExtension f
|
|
|
|
len = length ext
|