convert to RawByteString

This will break a lot of stuff that uses it, but once fixed should lead
to better performance.

Mostly mechanical.
Changes of note:

* upFrom now uses isPathSeparator, which is better on Windows where
  there is not just one
* splitShortExtensions used to take the length of a string,
  which would count wide unicode characters as a single character.
  Changing to B.length changes that. Note that, git-annex's
  annexMaxExtensionLength already changed to the length in bytes
  before this change. This function is only used in generating views,
  and the small behavior change should not be a problem.
* relHome still uses FilePath because it didn't seem worth changing(?)

This commit was sponsored by Jack Hill on Patreon.
This commit is contained in:
Joey Hess 2020-10-28 14:18:09 -04:00
parent 59dbd10f92
commit e219aadbab
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38

View file

@ -5,6 +5,7 @@
- License: BSD-2-clause - License: BSD-2-clause
-} -}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-tabs #-} {-# OPTIONS_GHC -fno-warn-tabs #-}
@ -33,11 +34,15 @@ module Utility.Path (
prop_relPathDirToFile_regressionTest, prop_relPathDirToFile_regressionTest,
) where ) where
import System.FilePath import System.FilePath.ByteString
import qualified System.FilePath as P
import qualified Data.ByteString as B
import Data.List import Data.List
import Data.Maybe import Data.Maybe
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
import Data.Char import Data.Char
#else
import System.Posix.Directory.ByteString (getWorkingDirectory)
#endif #endif
import Control.Applicative import Control.Applicative
import Prelude import Prelude
@ -52,15 +57,15 @@ import Utility.FileSystemEncoding
- and removing the trailing path separator. - and removing the trailing path separator.
- -
- On Windows, preserves whichever style of path separator might be used in - On Windows, preserves whichever style of path separator might be used in
- the input FilePaths. This is done because some programs in Windows - the input RawFilePaths. This is done because some programs in Windows
- demand a particular path separator -- and which one actually varies! - demand a particular path separator -- and which one actually varies!
- -
- This does not guarantee that two paths that refer to the same location, - This does not guarantee that two paths that refer to the same location,
- and are both relative to the same location (or both absolute) will - and are both relative to the same location (or both absolute) will
- yeild the same result. Run both through normalise from System.FilePath - yeild the same result. Run both through normalise from System.RawFilePath
- to ensure that. - to ensure that.
-} -}
simplifyPath :: FilePath -> FilePath simplifyPath :: RawFilePath -> RawFilePath
simplifyPath path = dropTrailingPathSeparator $ simplifyPath path = dropTrailingPathSeparator $
joinDrive drive $ joinPath $ norm [] $ splitPath path' joinDrive drive $ joinPath $ norm [] $ splitPath path'
where where
@ -85,42 +90,42 @@ simplifyPath path = dropTrailingPathSeparator $
- Does not attempt to deal with edge cases or ensure security with - Does not attempt to deal with edge cases or ensure security with
- untrusted inputs. - untrusted inputs.
-} -}
absPathFrom :: FilePath -> FilePath -> FilePath absPathFrom :: RawFilePath -> RawFilePath -> RawFilePath
absPathFrom dir path = simplifyPath (combine dir path) absPathFrom dir path = simplifyPath (combine dir path)
{- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -} {- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -}
parentDir :: FilePath -> FilePath parentDir :: RawFilePath -> RawFilePath
parentDir = takeDirectory . dropTrailingPathSeparator parentDir = takeDirectory . dropTrailingPathSeparator
{- Just the parent directory of a path, or Nothing if the path has no {- Just the parent directory of a path, or Nothing if the path has no
- parent (ie for "/" or "." or "foo") -} - parent (ie for "/" or "." or "foo") -}
upFrom :: FilePath -> Maybe FilePath upFrom :: RawFilePath -> Maybe RawFilePath
upFrom dir upFrom dir
| length dirs < 2 = Nothing | length dirs < 2 = Nothing
| otherwise = Just $ joinDrive drive $ intercalate s $ init dirs | otherwise = Just $ joinDrive drive $
B.intercalate (B.singleton pathSeparator) $ init dirs
where where
-- on Unix, the drive will be "/" when the dir is absolute, -- on Unix, the drive will be "/" when the dir is absolute,
-- otherwise "" -- otherwise ""
(drive, path) = splitDrive dir (drive, path) = splitDrive dir
s = [pathSeparator] dirs = filter (not . B.null) $ B.splitWith isPathSeparator path
dirs = filter (not . null) $ split s path
prop_upFrom_basics :: FilePath -> Bool prop_upFrom_basics :: RawFilePath -> Bool
prop_upFrom_basics dir prop_upFrom_basics dir
| null dir = True | B.null dir = True
| dir == "/" = p == Nothing | dir == "/" = p == Nothing
| otherwise = p /= Just dir | otherwise = p /= Just dir
where where
p = upFrom dir p = upFrom dir
{- Checks if the first FilePath is, or could be said to contain the second. {- Checks if the first RawFilePath is, or could be said to contain the second.
- For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc - For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc
- are all equivilant. - are all equivilant.
-} -}
dirContains :: FilePath -> FilePath -> Bool dirContains :: RawFilePath -> RawFilePath -> Bool
dirContains a b = a == b dirContains a b = a == b
|| a' == b' || a' == b'
|| (addTrailingPathSeparator a') `isPrefixOf` b' || (addTrailingPathSeparator a') `B.isPrefixOf` b'
|| a' == "." && normalise ("." </> b') == b' || a' == "." && normalise ("." </> b') == b'
where where
a' = norm a a' = norm a
@ -133,14 +138,18 @@ dirContains a b = a == b
- -
- Unlike Directory.canonicalizePath, this does not require the path - Unlike Directory.canonicalizePath, this does not require the path
- already exists. -} - already exists. -}
absPath :: FilePath -> IO FilePath absPath :: RawFilePath -> IO RawFilePath
absPath file absPath file
-- Avoid unncessarily getting the current directory when the path -- Avoid unncessarily getting the current directory when the path
-- is already absolute. absPathFrom uses simplifyPath -- is already absolute. absPathFrom uses simplifyPath
-- so also used here for consistency. -- so also used here for consistency.
| isAbsolute file = return $ simplifyPath file | isAbsolute file = return $ simplifyPath file
| otherwise = do | otherwise = do
cwd <- getCurrentDirectory #ifdef mingw32_HOST_OS
cwd <- toRawFilePath <$> getCurrentDirectory
#else
cwd <- getWorkingDirectory
#endif
return $ absPathFrom cwd file return $ absPathFrom cwd file
{- Constructs a relative path from the CWD to a file. {- Constructs a relative path from the CWD to a file.
@ -149,13 +158,17 @@ absPath file
- relPathCwdToFile "/tmp/foo" == ".." - relPathCwdToFile "/tmp/foo" == ".."
- relPathCwdToFile "/tmp/foo/bar" == "" - relPathCwdToFile "/tmp/foo/bar" == ""
-} -}
relPathCwdToFile :: FilePath -> IO FilePath relPathCwdToFile :: RawFilePath -> IO RawFilePath
relPathCwdToFile f = do relPathCwdToFile f = do
c <- getCurrentDirectory #ifdef mingw32_HOST_OS
c <- toRawFilePath <$> getCurrentDirectory
#else
c <- getWorkingDirectory
#endif
relPathDirToFile c f relPathDirToFile c f
{- Constructs a relative path from a directory to a file. -} {- Constructs a relative path from a directory to a file. -}
relPathDirToFile :: FilePath -> FilePath -> IO FilePath relPathDirToFile :: RawFilePath -> RawFilePath -> IO RawFilePath
relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to
{- This requires the first path to be absolute, and the {- This requires the first path to be absolute, and the
@ -165,7 +178,7 @@ relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to
- a relative path is not possible and the path is simply - a relative path is not possible and the path is simply
- returned as-is. - returned as-is.
-} -}
relPathDirToFileAbs :: FilePath -> FilePath -> FilePath relPathDirToFileAbs :: RawFilePath -> RawFilePath -> RawFilePath
relPathDirToFileAbs from to relPathDirToFileAbs from to
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
| normdrive from /= normdrive to = to | normdrive from /= normdrive to = to
@ -181,14 +194,14 @@ relPathDirToFileAbs from to
dotdots = replicate (length pfrom - numcommon) ".." dotdots = replicate (length pfrom - numcommon) ".."
numcommon = length common numcommon = length common
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
normdrive = map toLower . takeWhile (/= ':') . takeDrive normdrive = map toLower . takeWhile (/= ':') . fromRawFilePath . takeDrive
#endif #endif
prop_relPathDirToFile_basics :: FilePath -> FilePath -> Bool prop_relPathDirToFile_basics :: RawFilePath -> RawFilePath -> Bool
prop_relPathDirToFile_basics from to prop_relPathDirToFile_basics from to
| null from || null to = True | B.null from || B.null to = True
| from == to = null r | from == to = B.null r
| otherwise = not (null r) | otherwise = not (B.null r)
where where
r = relPathDirToFileAbs from to r = relPathDirToFileAbs from to
@ -199,8 +212,8 @@ prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference
- location, but it's not really the same directory. - location, but it's not really the same directory.
- Code used to get this wrong. -} - Code used to get this wrong. -}
same_dir_shortcurcuits_at_difference = same_dir_shortcurcuits_at_difference =
relPathDirToFileAbs (joinPath [pathSeparator : "tmp", "r", "lll", "xxx", "yyy", "18"]) relPathDirToFileAbs (joinPath [pathSeparator `B.cons` "tmp", "r", "lll", "xxx", "yyy", "18"])
(joinPath [pathSeparator : "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]) (joinPath [pathSeparator `B.cons` "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"])
== joinPath ["..", "..", "..", "..", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"] == joinPath ["..", "..", "..", "..", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]
{- Given an original list of paths, and an expanded list derived from it, {- Given an original list of paths, and an expanded list derived from it,
@ -228,8 +241,7 @@ segmentPaths' f c (i:is) new =
(found, rest) = if length is < 100 (found, rest) = if length is < 100
then partition ini new then partition ini new
else break (not . ini) new else break (not . ini) new
ini p = i' `dirContains` fromRawFilePath (c p) ini p = i `dirContains` c p
i' = fromRawFilePath i
{- This assumes that it's cheaper to call segmentPaths on the result, {- This assumes that it's cheaper to call segmentPaths on the result,
- than it would be to run the action separately with each path. In - than it would be to run the action separately with each path. In
@ -244,9 +256,10 @@ runSegmentPaths' si c a paths = segmentPaths' si c paths <$> a paths
{- Converts paths in the home directory to use ~/ -} {- Converts paths in the home directory to use ~/ -}
relHome :: FilePath -> IO String relHome :: FilePath -> IO String
relHome path = do relHome path = do
home <- myHomeDir let path' = toRawFilePath path
return $ if dirContains home path home <- toRawFilePath <$> myHomeDir
then "~/" ++ relPathDirToFileAbs home path return $ if dirContains home path'
then fromRawFilePath ("~/" <> relPathDirToFileAbs home path')
else path else path
{- Checks if a command is available in PATH. {- Checks if a command is available in PATH.
@ -265,10 +278,10 @@ inPath command = isJust <$> searchPath command
-} -}
searchPath :: String -> IO (Maybe FilePath) searchPath :: String -> IO (Maybe FilePath)
searchPath command searchPath command
| isAbsolute command = check command | P.isAbsolute command = check command
| otherwise = getSearchPath >>= getM indir | otherwise = P.getSearchPath >>= getM indir
where where
indir d = check $ d </> command indir d = check $ d P.</> command
check f = firstM doesFileExist check f = firstM doesFileExist
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
[f, f ++ ".exe"] [f, f ++ ".exe"]
@ -278,26 +291,26 @@ searchPath command
{- Checks if a filename is a unix dotfile. All files inside dotdirs {- Checks if a filename is a unix dotfile. All files inside dotdirs
- count as dotfiles. -} - count as dotfiles. -}
dotfile :: FilePath -> Bool dotfile :: RawFilePath -> Bool
dotfile file dotfile file
| f == "." = False | f == "." = False
| f == ".." = False | f == ".." = False
| f == "" = False | f == "" = False
| otherwise = "." `isPrefixOf` f || dotfile (takeDirectory file) | otherwise = "." `B.isPrefixOf` f || dotfile (takeDirectory file)
where where
f = takeFileName file f = takeFileName file
{- Similar to splitExtensions, but knows that some things in FilePaths {- Similar to splitExtensions, but knows that some things in RawFilePaths
- after a dot are too long to be extensions. -} - after a dot are too long to be extensions. -}
splitShortExtensions :: FilePath -> (FilePath, [String]) splitShortExtensions :: RawFilePath -> (RawFilePath, [B.ByteString])
splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg" splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg"
splitShortExtensions' :: Int -> FilePath -> (FilePath, [String]) splitShortExtensions' :: Int -> RawFilePath -> (RawFilePath, [B.ByteString])
splitShortExtensions' maxextension = go [] splitShortExtensions' maxextension = go []
where where
go c f go c f
| len > 0 && len <= maxextension && not (null base) = | len > 0 && len <= maxextension && not (B.null base) =
go (ext:c) base go (ext:c) base
| otherwise = (f, c) | otherwise = (f, c)
where where
(base, ext) = splitExtension f (base, ext) = splitExtension f
len = length ext len = B.length ext