From e219aadbab1ec33d9b727b5113477de4e8a781c2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Oct 2020 14:18:09 -0400 Subject: [PATCH] convert to RawByteString This will break a lot of stuff that uses it, but once fixed should lead to better performance. Mostly mechanical. Changes of note: * upFrom now uses isPathSeparator, which is better on Windows where there is not just one * splitShortExtensions used to take the length of a string, which would count wide unicode characters as a single character. Changing to B.length changes that. Note that, git-annex's annexMaxExtensionLength already changed to the length in bytes before this change. This function is only used in generating views, and the small behavior change should not be a problem. * relHome still uses FilePath because it didn't seem worth changing(?) This commit was sponsored by Jack Hill on Patreon. --- Utility/Path.hs | 99 ++++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 43 deletions(-) diff --git a/Utility/Path.hs b/Utility/Path.hs index 6f38b07c13..11e9ab00f3 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -5,6 +5,7 @@ - License: BSD-2-clause -} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-tabs #-} @@ -33,11 +34,15 @@ module Utility.Path ( prop_relPathDirToFile_regressionTest, ) where -import System.FilePath +import System.FilePath.ByteString +import qualified System.FilePath as P +import qualified Data.ByteString as B import Data.List import Data.Maybe #ifdef mingw32_HOST_OS import Data.Char +#else +import System.Posix.Directory.ByteString (getWorkingDirectory) #endif import Control.Applicative import Prelude @@ -52,15 +57,15 @@ import Utility.FileSystemEncoding - and removing the trailing path separator. - - On Windows, preserves whichever style of path separator might be used in - - the input FilePaths. This is done because some programs in Windows + - the input RawFilePaths. This is done because some programs in Windows - demand a particular path separator -- and which one actually varies! - - This does not guarantee that two paths that refer to the same location, - and are both relative to the same location (or both absolute) will - - yeild the same result. Run both through normalise from System.FilePath + - yeild the same result. Run both through normalise from System.RawFilePath - to ensure that. -} -simplifyPath :: FilePath -> FilePath +simplifyPath :: RawFilePath -> RawFilePath simplifyPath path = dropTrailingPathSeparator $ joinDrive drive $ joinPath $ norm [] $ splitPath path' where @@ -85,42 +90,42 @@ simplifyPath path = dropTrailingPathSeparator $ - Does not attempt to deal with edge cases or ensure security with - untrusted inputs. -} -absPathFrom :: FilePath -> FilePath -> FilePath +absPathFrom :: RawFilePath -> RawFilePath -> RawFilePath absPathFrom dir path = simplifyPath (combine dir path) {- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -} -parentDir :: FilePath -> FilePath +parentDir :: RawFilePath -> RawFilePath parentDir = takeDirectory . dropTrailingPathSeparator {- Just the parent directory of a path, or Nothing if the path has no - parent (ie for "/" or "." or "foo") -} -upFrom :: FilePath -> Maybe FilePath +upFrom :: RawFilePath -> Maybe RawFilePath upFrom dir | length dirs < 2 = Nothing - | otherwise = Just $ joinDrive drive $ intercalate s $ init dirs + | otherwise = Just $ joinDrive drive $ + B.intercalate (B.singleton pathSeparator) $ init dirs where -- on Unix, the drive will be "/" when the dir is absolute, -- otherwise "" (drive, path) = splitDrive dir - s = [pathSeparator] - dirs = filter (not . null) $ split s path + dirs = filter (not . B.null) $ B.splitWith isPathSeparator path -prop_upFrom_basics :: FilePath -> Bool +prop_upFrom_basics :: RawFilePath -> Bool prop_upFrom_basics dir - | null dir = True + | B.null dir = True | dir == "/" = p == Nothing | otherwise = p /= Just dir where p = upFrom dir -{- Checks if the first FilePath is, or could be said to contain the second. +{- Checks if the first RawFilePath is, or could be said to contain the second. - For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc - are all equivilant. -} -dirContains :: FilePath -> FilePath -> Bool +dirContains :: RawFilePath -> RawFilePath -> Bool dirContains a b = a == b || a' == b' - || (addTrailingPathSeparator a') `isPrefixOf` b' + || (addTrailingPathSeparator a') `B.isPrefixOf` b' || a' == "." && normalise ("." b') == b' where a' = norm a @@ -133,14 +138,18 @@ dirContains a b = a == b - - Unlike Directory.canonicalizePath, this does not require the path - already exists. -} -absPath :: FilePath -> IO FilePath +absPath :: RawFilePath -> IO RawFilePath absPath file -- Avoid unncessarily getting the current directory when the path -- is already absolute. absPathFrom uses simplifyPath -- so also used here for consistency. | isAbsolute file = return $ simplifyPath file | otherwise = do - cwd <- getCurrentDirectory +#ifdef mingw32_HOST_OS + cwd <- toRawFilePath <$> getCurrentDirectory +#else + cwd <- getWorkingDirectory +#endif return $ absPathFrom cwd file {- Constructs a relative path from the CWD to a file. @@ -149,13 +158,17 @@ absPath file - relPathCwdToFile "/tmp/foo" == ".." - relPathCwdToFile "/tmp/foo/bar" == "" -} -relPathCwdToFile :: FilePath -> IO FilePath +relPathCwdToFile :: RawFilePath -> IO RawFilePath relPathCwdToFile f = do - c <- getCurrentDirectory +#ifdef mingw32_HOST_OS + c <- toRawFilePath <$> getCurrentDirectory +#else + c <- getWorkingDirectory +#endif relPathDirToFile c f {- Constructs a relative path from a directory to a file. -} -relPathDirToFile :: FilePath -> FilePath -> IO FilePath +relPathDirToFile :: RawFilePath -> RawFilePath -> IO RawFilePath relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to {- This requires the first path to be absolute, and the @@ -165,7 +178,7 @@ relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to - a relative path is not possible and the path is simply - returned as-is. -} -relPathDirToFileAbs :: FilePath -> FilePath -> FilePath +relPathDirToFileAbs :: RawFilePath -> RawFilePath -> RawFilePath relPathDirToFileAbs from to #ifdef mingw32_HOST_OS | normdrive from /= normdrive to = to @@ -181,14 +194,14 @@ relPathDirToFileAbs from to dotdots = replicate (length pfrom - numcommon) ".." numcommon = length common #ifdef mingw32_HOST_OS - normdrive = map toLower . takeWhile (/= ':') . takeDrive + normdrive = map toLower . takeWhile (/= ':') . fromRawFilePath . takeDrive #endif -prop_relPathDirToFile_basics :: FilePath -> FilePath -> Bool +prop_relPathDirToFile_basics :: RawFilePath -> RawFilePath -> Bool prop_relPathDirToFile_basics from to - | null from || null to = True - | from == to = null r - | otherwise = not (null r) + | B.null from || B.null to = True + | from == to = B.null r + | otherwise = not (B.null r) where r = relPathDirToFileAbs from to @@ -199,8 +212,8 @@ prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference - location, but it's not really the same directory. - Code used to get this wrong. -} same_dir_shortcurcuits_at_difference = - relPathDirToFileAbs (joinPath [pathSeparator : "tmp", "r", "lll", "xxx", "yyy", "18"]) - (joinPath [pathSeparator : "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]) + relPathDirToFileAbs (joinPath [pathSeparator `B.cons` "tmp", "r", "lll", "xxx", "yyy", "18"]) + (joinPath [pathSeparator `B.cons` "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]) == joinPath ["..", "..", "..", "..", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"] {- Given an original list of paths, and an expanded list derived from it, @@ -228,8 +241,7 @@ segmentPaths' f c (i:is) new = (found, rest) = if length is < 100 then partition ini new else break (not . ini) new - ini p = i' `dirContains` fromRawFilePath (c p) - i' = fromRawFilePath i + ini p = i `dirContains` c p {- This assumes that it's cheaper to call segmentPaths on the result, - than it would be to run the action separately with each path. In @@ -244,9 +256,10 @@ runSegmentPaths' si c a paths = segmentPaths' si c paths <$> a paths {- Converts paths in the home directory to use ~/ -} relHome :: FilePath -> IO String relHome path = do - home <- myHomeDir - return $ if dirContains home path - then "~/" ++ relPathDirToFileAbs home path + let path' = toRawFilePath path + home <- toRawFilePath <$> myHomeDir + return $ if dirContains home path' + then fromRawFilePath ("~/" <> relPathDirToFileAbs home path') else path {- Checks if a command is available in PATH. @@ -265,10 +278,10 @@ inPath command = isJust <$> searchPath command -} searchPath :: String -> IO (Maybe FilePath) searchPath command - | isAbsolute command = check command - | otherwise = getSearchPath >>= getM indir + | P.isAbsolute command = check command + | otherwise = P.getSearchPath >>= getM indir where - indir d = check $ d command + indir d = check $ d P. command check f = firstM doesFileExist #ifdef mingw32_HOST_OS [f, f ++ ".exe"] @@ -278,26 +291,26 @@ searchPath command {- Checks if a filename is a unix dotfile. All files inside dotdirs - count as dotfiles. -} -dotfile :: FilePath -> Bool +dotfile :: RawFilePath -> Bool dotfile file | f == "." = False | f == ".." = False | f == "" = False - | otherwise = "." `isPrefixOf` f || dotfile (takeDirectory file) + | otherwise = "." `B.isPrefixOf` f || dotfile (takeDirectory file) where f = takeFileName file -{- Similar to splitExtensions, but knows that some things in FilePaths +{- Similar to splitExtensions, but knows that some things in RawFilePaths - after a dot are too long to be extensions. -} -splitShortExtensions :: FilePath -> (FilePath, [String]) +splitShortExtensions :: RawFilePath -> (RawFilePath, [B.ByteString]) splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg" -splitShortExtensions' :: Int -> FilePath -> (FilePath, [String]) +splitShortExtensions' :: Int -> RawFilePath -> (RawFilePath, [B.ByteString]) splitShortExtensions' maxextension = go [] where go c f - | len > 0 && len <= maxextension && not (null base) = + | len > 0 && len <= maxextension && not (B.null base) = go (ext:c) base | otherwise = (f, c) where (base, ext) = splitExtension f - len = length ext + len = B.length ext