git-annex/Utility/Path.hs
Joey Hess b2c48fb86b
Fix using lookupkey inside a subdirectory
Caused by dirContains ".." "foo" being incorrectly False.

Also added a test of dirContains, which includes all the previous bug fixes
I could find and some obvious cases.

Reversion in version 8.20211011

Sponsored-by: Brett Eisenberg on Patreon
2021-10-26 15:00:45 -04:00

274 lines
8.8 KiB
Haskell

{- path manipulation
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-tabs #-}
module Utility.Path (
simplifyPath,
parentDir,
upFrom,
dirContains,
segmentPaths,
segmentPaths',
runSegmentPaths,
runSegmentPaths',
dotfile,
splitShortExtensions,
relPathDirToFileAbs,
inSearchPath,
searchPath,
searchPathContents,
) where
import System.FilePath.ByteString
import qualified System.FilePath as P
import qualified Data.ByteString as B
import Data.List
import Data.Maybe
import Control.Monad
import Control.Applicative
import Prelude
import Utility.Monad
import Utility.SystemDirectory
import Utility.Exception
#ifdef mingw32_HOST_OS
import Data.Char
import Utility.FileSystemEncoding
#endif
{- Simplifies a path, removing any "." component, collapsing "dir/..",
- and removing the trailing path separator.
-
- On Windows, preserves whichever style of path separator might be used in
- the input RawFilePaths. This is done because some programs in Windows
- demand a particular path separator -- and which one actually varies!
-
- This does not guarantee that two paths that refer to the same location,
- and are both relative to the same location (or both absolute) will
- yeild the same result. Run both through normalise from System.RawFilePath
- to ensure that.
-}
simplifyPath :: RawFilePath -> RawFilePath
simplifyPath path = dropTrailingPathSeparator $
joinDrive drive $ joinPath $ norm [] $ splitPath path'
where
(drive, path') = splitDrive path
norm c [] = reverse c
norm c (p:ps)
| p' == ".." && not (null c) && dropTrailingPathSeparator (c !! 0) /= ".." =
norm (drop 1 c) ps
| p' == "." = norm c ps
| otherwise = norm (p:c) ps
where
p' = dropTrailingPathSeparator p
{- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -}
parentDir :: RawFilePath -> RawFilePath
parentDir = takeDirectory . dropTrailingPathSeparator
{- Just the parent directory of a path, or Nothing if the path has no
- parent (ie for "/" or "." or "foo") -}
upFrom :: RawFilePath -> Maybe RawFilePath
upFrom dir
| length dirs < 2 = Nothing
| otherwise = Just $ joinDrive drive $
B.intercalate (B.singleton pathSeparator) $ init dirs
where
-- on Unix, the drive will be "/" when the dir is absolute,
-- otherwise ""
(drive, path) = splitDrive dir
dirs = filter (not . B.null) $ B.splitWith isPathSeparator path
{- Checks if the first RawFilePath is, or could be said to contain the second.
- For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc
- are all equivilant.
-}
dirContains :: RawFilePath -> RawFilePath -> Bool
dirContains a b = a == b
|| a' == b'
|| (a'' `B.isPrefixOf` b' && avoiddotdotb)
|| a' == "." && normalise ("." </> b') == b' && nodotdot b'
|| dotdotcontains
where
a' = norm a
a'' = addTrailingPathSeparator a'
b' = norm b
norm = normalise . simplifyPath
{- This handles the case where a is ".." and b is "../..",
- which is not inside a. Similarly, "../.." does not contain
- "../../../". Due to the use of norm, cases like
- "../../foo/../../" get converted to eg "../../.." and
- so do not need to be handled specially here.
-
- When this is called, we already know that
- a'' is a prefix of b', so all that needs to be done is drop
- that prefix, and check if the next path component is ".."
-}
avoiddotdotb = nodotdot $ B.drop (B.length a'') b'
nodotdot p = all (not . isdotdot) (splitPath p)
isdotdot s = dropTrailingPathSeparator s == ".."
{- This handles the case where a is ".." or "../.." etc,
- and b is "foo" or "../foo" etc. The rule is that when
- a is entirely ".." components, b is under it when it starts
- with fewer ".." components.
-
- Due to the use of norm, cases like "../../foo/../../" get
- converted to eg "../../../" and so do not need to be handled
- specially here.
-}
dotdotcontains
| isAbsolute b' = False
| otherwise =
let aps = splitPath a'
bps = splitPath b'
in if all isdotdot aps
then length (takeWhile isdotdot bps) < length aps
else False
{- Given an original list of paths, and an expanded list derived from it,
- which may be arbitrarily reordered, generates a list of lists, where
- each sublist corresponds to one of the original paths.
-
- When the original path is a directory, any items in the expanded list
- that are contained in that directory will appear in its segment.
-
- The order of the original list of paths is attempted to be preserved in
- the order of the returned segments. However, doing so has a O^NM
- growth factor. So, if the original list has more than 100 paths on it,
- we stop preserving ordering at that point. Presumably a user passing
- that many paths in doesn't care too much about order of the later ones.
-}
segmentPaths :: (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[a]]
segmentPaths = segmentPaths' (\_ r -> r)
segmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[r]]
segmentPaths' f _ [] new = [map (f Nothing) new]
segmentPaths' f _ [i] new = [map (f (Just i)) new] -- optimisation
segmentPaths' f c (i:is) new =
map (f (Just i)) found : segmentPaths' f c is rest
where
(found, rest) = if length is < 100
then partition ini new
else break (not . ini) new
ini p = i `dirContains` c p
{- This assumes that it's cheaper to call segmentPaths on the result,
- than it would be to run the action separately with each path. In
- the case of git file list commands, that assumption tends to hold.
-}
runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[a]]
runSegmentPaths c a paths = segmentPaths c paths <$> a paths
runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[r]]
runSegmentPaths' si c a paths = segmentPaths' si c paths <$> a paths
{- Checks if a filename is a unix dotfile. All files inside dotdirs
- count as dotfiles. -}
dotfile :: RawFilePath -> Bool
dotfile file
| f == "." = False
| f == ".." = False
| f == "" = False
| otherwise = "." `B.isPrefixOf` f || dotfile (takeDirectory file)
where
f = takeFileName file
{- Similar to splitExtensions, but knows that some things in RawFilePaths
- after a dot are too long to be extensions. -}
splitShortExtensions :: RawFilePath -> (RawFilePath, [B.ByteString])
splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg"
splitShortExtensions' :: Int -> RawFilePath -> (RawFilePath, [B.ByteString])
splitShortExtensions' maxextension = go []
where
go c f
| len > 0 && len <= maxextension && not (B.null base) =
go (ext:c) base
| otherwise = (f, c)
where
(base, ext) = splitExtension f
len = B.length ext
{- This requires both paths to be absolute and normalized.
-
- On Windows, if the paths are on different drives,
- a relative path is not possible and the path is simply
- returned as-is.
-}
relPathDirToFileAbs :: RawFilePath -> RawFilePath -> RawFilePath
relPathDirToFileAbs from to
#ifdef mingw32_HOST_OS
| normdrive from /= normdrive to = to
#endif
| otherwise = joinPath $ dotdots ++ uncommon
where
pfrom = sp from
pto = sp to
sp = map dropTrailingPathSeparator . splitPath . dropDrive
common = map fst $ takeWhile same $ zip pfrom pto
same (c,d) = c == d
uncommon = drop numcommon pto
dotdots = replicate (length pfrom - numcommon) ".."
numcommon = length common
#ifdef mingw32_HOST_OS
normdrive = map toLower
-- Get just the drive letter, removing any leading
-- path separator, which takeDrive leaves on the drive
-- letter.
. dropWhileEnd (isPathSeparator . fromIntegral . ord)
. fromRawFilePath
. takeDrive
#endif
{- Checks if a command is available in PATH.
-
- The command may be fully-qualified, in which case, this succeeds as
- long as it exists. -}
inSearchPath :: String -> IO Bool
inSearchPath command = isJust <$> searchPath command
{- Finds a command in PATH and returns the full path to it.
-
- The command may be fully qualified already, in which case it will
- be returned if it exists.
-
- Note that this will find commands in PATH that are not executable.
-}
searchPath :: String -> IO (Maybe FilePath)
searchPath command
| P.isAbsolute command = check command
| otherwise = P.getSearchPath >>= getM indir
where
indir d = check $ d P.</> command
check f = firstM doesFileExist
#ifdef mingw32_HOST_OS
[f, f ++ ".exe"]
#else
[f]
#endif
{- Finds commands in PATH that match a predicate. Note that the predicate
- matches on the basename of the command, but the full path to it is
- returned.
-
- Note that this will find commands in PATH that are not executable.
-}
searchPathContents :: (FilePath -> Bool) -> IO [FilePath]
searchPathContents p =
filterM doesFileExist
=<< (concat <$> (P.getSearchPath >>= mapM go))
where
go d = map (d P.</>) . filter p
<$> catchDefaultIO [] (getDirectoryContents d)