From 6a97ff6b3a5ffacfe889abd6c23325a0f4f1dfc9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 25 Nov 2019 16:18:19 -0400 Subject: [PATCH] wip RawFilePath Goal is to make git-annex faster by using ByteString for all the worktree traversal. For now, this is focusing on Command.Find, in order to benchmark how much it helps. (All other commands are temporarily disabled) Currently in a very bad unbuildable in-between state. --- Annex/Locations.hs | 6 ++- CmdLine/GitAnnex.hs | 11 ++++- CmdLine/Seek.hs | 26 +++++------ Git/Branch.hs | 19 ++++---- Git/CatFile.hs | 12 ++--- Git/Command.hs | 32 ++++++++------ Git/Construct.hs | 2 +- Git/DiffTree.hs | 4 +- Git/FilePath.hs | 20 ++++----- Git/Filename.hs | 38 +++++++++------- Git/HashObject.hs | 2 +- Git/LsFiles.hs | 105 +++++++++++++++++++++++--------------------- Git/LsTree.hs | 47 ++++++++++++-------- Git/Ref.hs | 20 ++++++--- Git/RefLog.hs | 2 +- Git/Repair.hs | 4 +- Git/Status.hs | 2 +- Git/Tree.hs | 10 ++--- Git/Types.hs | 21 +++++---- Git/UnionMerge.hs | 5 ++- Git/UpdateIndex.hs | 37 +++++++++------- Types/Export.hs | 19 ++++---- Types/Import.hs | 4 +- Utility/Misc.hs | 7 +++ git-annex.cabal | 3 +- 25 files changed, 258 insertions(+), 200 deletions(-) diff --git a/Annex/Locations.hs b/Annex/Locations.hs index f9c65732e8..cdaa8d3f6e 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -201,8 +201,10 @@ gitAnnexLink file key r config = do | not (coreSymlinks config) && needsSubmoduleFixup r = absNormPathUnix currdir $ Git.repoPath r ".git" | otherwise = Git.localGitDir r - absNormPathUnix d p = toInternalGitPath $ - absPathFrom (toInternalGitPath d) (toInternalGitPath p) + absNormPathUnix d p = fromRawFilePath $ toInternalGitPath $ toRawFilePath $ + absPathFrom + (fromRawFilePath $ toInternalGitPath $ toRawFilePath d) + (fromRawFilePath $ toInternalGitPath $ toRawFilePath p) {- Calculates a symlink target as would be used in a typical git - repository, with .git in the top of the work tree. -} diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 9fc0d272ae..bfec6f6271 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -18,6 +18,7 @@ import Annex.Multicast import Types.Test import Types.Benchmark +{- import qualified Command.Help import qualified Command.Add import qualified Command.Unannex @@ -66,7 +67,9 @@ import qualified Command.Unlock import qualified Command.Lock import qualified Command.PreCommit import qualified Command.PostReceive +-} import qualified Command.Find +{- import qualified Command.FindRef import qualified Command.Whereis import qualified Command.List @@ -122,10 +125,11 @@ import qualified Command.Test import qualified Command.FuzzTest import qualified Command.TestRemote import qualified Command.Benchmark +-} cmds :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [Command] cmds testoptparser testrunner mkbenchmarkgenerator = - [ Command.Help.cmd +{- [ Command.Help.cmd , Command.Add.cmd , Command.Get.cmd , Command.Drop.cmd @@ -196,7 +200,9 @@ cmds testoptparser testrunner mkbenchmarkgenerator = , Command.Unused.cmd , Command.DropUnused.cmd , Command.AddUnused.cmd - , Command.Find.cmd +-} + [ Command.Find.cmd +{- , Command.FindRef.cmd , Command.Whereis.cmd , Command.List.cmd @@ -231,6 +237,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = , Command.TestRemote.cmd , Command.Benchmark.cmd $ mkbenchmarkgenerator $ cmds testoptparser testrunner (\_ _ -> return noop) +-} ] run :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [String] -> IO () diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 4107e9dcd6..e523eac99a 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -34,11 +34,11 @@ import Annex.Content import Annex.InodeSentinal import qualified Database.Keys -withFilesInGit :: (FilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesInGit :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withFilesInGit a l = seekActions $ prepFiltered a $ seekHelper LsFiles.inRepo l -withFilesInGitNonRecursive :: String -> (FilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesInGitNonRecursive :: String -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withFilesInGitNonRecursive needforce a l = ifM (Annex.getState Annex.force) ( withFilesInGit a l , if null l @@ -58,7 +58,7 @@ withFilesInGitNonRecursive needforce a l = ifM (Annex.getState Annex.force) getfiles c ps _ -> giveup needforce -withFilesNotInGit :: Bool -> (FilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesNotInGit :: Bool -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withFilesNotInGit skipdotfiles a l | skipdotfiles = do {- dotfiles are not acted on unless explicitly listed -} @@ -78,7 +78,7 @@ withFilesNotInGit skipdotfiles a l go fs = seekActions $ prepFiltered a $ return $ concat $ segmentPaths (map (\(WorkTreeItem f) -> f) l) fs -withPathContents :: ((FilePath, FilePath) -> CommandSeek) -> CmdParams -> CommandSeek +withPathContents :: ((RawFilePath, RawFilePath) -> CommandSeek) -> CmdParams -> CommandSeek withPathContents a params = do matcher <- Limit.getMatcher forM_ params $ \p -> do @@ -110,30 +110,30 @@ withPairs a params = seekActions $ return $ map a $ pairs [] params pairs c (x:y:xs) = pairs ((x,y):c) xs pairs _ _ = giveup "expected pairs" -withFilesToBeCommitted :: (FilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesToBeCommitted :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withFilesToBeCommitted a l = seekActions $ prepFiltered a $ seekHelper LsFiles.stagedNotDeleted l -isOldUnlocked :: FilePath -> Annex Bool +isOldUnlocked :: RawFilePath -> Annex Bool isOldUnlocked f = liftIO (notSymlink f) <&&> (isJust <$> catKeyFile f <||> isJust <$> catKeyFileHEAD f) {- unlocked pointer files that are staged, and whose content has not been - modified-} -withUnmodifiedUnlockedPointers :: (FilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withUnmodifiedUnlockedPointers :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withUnmodifiedUnlockedPointers a l = seekActions $ prepFiltered a unlockedfiles where unlockedfiles = filterM isUnmodifiedUnlocked =<< seekHelper LsFiles.typeChangedStaged l -isUnmodifiedUnlocked :: FilePath -> Annex Bool +isUnmodifiedUnlocked :: RawFilePath -> Annex Bool isUnmodifiedUnlocked f = catKeyFile f >>= \case Nothing -> return False Just k -> sameInodeCache f =<< Database.Keys.getInodeCaches k {- Finds files that may be modified. -} -withFilesMaybeModified :: (FilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesMaybeModified :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withFilesMaybeModified a params = seekActions $ prepFiltered a $ seekHelper LsFiles.modified params @@ -225,7 +225,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do forM_ ts $ \(t, i) -> keyaction (transferKey t, mkActionItem (t, i)) -prepFiltered :: (FilePath -> CommandSeek) -> Annex [FilePath] -> Annex [CommandSeek] +prepFiltered :: (RawFilePath -> CommandSeek) -> Annex [RawFilePath] -> Annex [CommandSeek] prepFiltered a fs = do matcher <- Limit.getMatcher map (process matcher) <$> fs @@ -235,7 +235,7 @@ prepFiltered a fs = do seekActions :: Annex [CommandSeek] -> Annex () seekActions gen = sequence_ =<< gen -seekHelper :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> [WorkTreeItem] -> Annex [FilePath] +seekHelper :: ([RawFilePath] -> Git.Repo -> IO ([RawFilePath], IO Bool)) -> [WorkTreeItem] -> Annex [RawFilePath] seekHelper a l = inRepo $ \g -> concat . concat <$> forM (segmentXargsOrdered l') (runSegmentPaths (\fs -> Git.Command.leaveZombie <$> a fs g)) @@ -243,7 +243,7 @@ seekHelper a l = inRepo $ \g -> l' = map (\(WorkTreeItem f) -> f) l -- An item in the work tree, which may be a file or a directory. -newtype WorkTreeItem = WorkTreeItem FilePath +newtype WorkTreeItem = WorkTreeItem RawFilePath -- When in an adjusted branch that hides some files, it may not exist -- in the current work tree, but in the original branch. This allows @@ -273,5 +273,5 @@ workTreeItems' (AllowHidden allowhidden) ps = do isJust <$> catObjectMetaDataHidden f currbranch | otherwise = return False -notSymlink :: FilePath -> IO Bool +notSymlink :: RawFilePath -> IO Bool notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f diff --git a/Git/Branch.hs b/Git/Branch.hs index 2de6f9e0fd..ffd9a189d2 100644 --- a/Git/Branch.hs +++ b/Git/Branch.hs @@ -16,6 +16,8 @@ import Git.Command import qualified Git.Config import qualified Git.Ref +import qualified Data.ByteString as B + {- The currently checked out branch. - - In a just initialized git repo before the first commit, @@ -29,19 +31,19 @@ current r = do case v of Nothing -> return Nothing Just branch -> - ifM (null <$> pipeReadStrict [Param "show-ref", Param $ fromRef branch] r) + ifM (B.null <$> pipeReadStrict [Param "show-ref", Param $ fromRef branch] r) ( return Nothing , return v ) {- The current branch, which may not really exist yet. -} currentUnsafe :: Repo -> IO (Maybe Branch) -currentUnsafe r = parse . firstLine +currentUnsafe r = parse . firstLine' <$> pipeReadStrict [Param "symbolic-ref", Param "-q", Param $ fromRef Git.Ref.headRef] r where - parse l - | null l = Nothing - | otherwise = Just $ Git.Ref l + parse b + | B.null b = Nothing + | otherwise = Just $ Git.Ref $ decodeBS b {- Checks if the second branch has any commits not present on the first - branch. -} @@ -53,7 +55,8 @@ changed origbranch newbranch repo where changed' :: Branch -> Branch -> [CommandParam] -> Repo -> IO String -changed' origbranch newbranch extraps repo = pipeReadStrict ps repo +changed' origbranch newbranch extraps repo = + decodeBS <$> pipeReadStrict ps repo where ps = [ Param "log" @@ -72,7 +75,7 @@ changedCommits origbranch newbranch extraps repo = - - This requires there to be a path from the old to the new. -} fastForwardable :: Ref -> Ref -> Repo -> IO Bool -fastForwardable old new repo = not . null <$> +fastForwardable old new repo = not . B.null <$> pipeReadStrict [ Param "log" , Param $ fromRef old ++ ".." ++ fromRef new @@ -160,7 +163,7 @@ commitCommand' runner commitmode ps = runner $ commit :: CommitMode -> Bool -> String -> Branch -> [Ref] -> Repo -> IO (Maybe Sha) commit commitmode allowempty message branch parentrefs repo = do tree <- getSha "write-tree" $ - pipeReadStrict [Param "write-tree"] repo + decodeBS' <$> pipeReadStrict [Param "write-tree"] repo ifM (cancommit tree) ( do sha <- commitTree commitmode message parentrefs tree repo diff --git a/Git/CatFile.hs b/Git/CatFile.hs index 49b89454c9..732c18a643 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -66,13 +66,13 @@ catFileStop h = do CoProcess.stop (checkFileProcess h) {- Reads a file from a specified branch. -} -catFile :: CatFileHandle -> Branch -> FilePath -> IO L.ByteString +catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString catFile h branch file = catObject h $ Ref $ - fromRef branch ++ ":" ++ toInternalGitPath file + fromRef branch ++ ":" ++ fromRawFilePath (toInternalGitPath file) -catFileDetails :: CatFileHandle -> Branch -> FilePath -> IO (Maybe (L.ByteString, Sha, ObjectType)) +catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType)) catFileDetails h branch file = catObjectDetails h $ Ref $ - fromRef branch ++ ":" ++ toInternalGitPath file + fromRef branch ++ ":" ++ fromRawFilePath (toInternalGitPath file) {- Uses a running git cat-file read the content of an object. - Objects that do not exist will have "" returned. -} @@ -148,7 +148,7 @@ parseResp object l | otherwise = case words l of [sha, objtype, size] | length sha == shaSize -> - case (readObjectType objtype, reads size) of + case (readObjectType (encodeBS objtype), reads size) of (Just t, [(bytes, "")]) -> Just $ ParsedResp (Ref sha) bytes t _ -> Nothing @@ -185,7 +185,7 @@ querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n')) <$> querySingle (Param "-s") r repo hGetContentsStrict queryObjectType :: Ref -> Repo -> IO (Maybe ObjectType) -queryObjectType r repo = maybe Nothing (readObjectType . takeWhile (/= '\n')) +queryObjectType r repo = maybe Nothing (readObjectType . encodeBS . takeWhile (/= '\n')) <$> querySingle (Param "-t") r repo hGetContentsStrict queryContent :: Ref -> Repo -> IO (Maybe L.ByteString) diff --git a/Git/Command.hs b/Git/Command.hs index 3101acd79e..12f69b6201 100644 --- a/Git/Command.hs +++ b/Git/Command.hs @@ -14,6 +14,9 @@ import Git import Git.Types import qualified Utility.CoProcess as CoProcess +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S + {- Constructs a git command line operating on the specified repo. -} gitCommandLine :: [CommandParam] -> Repo -> [CommandParam] gitCommandLine params r@(Repo { location = l@(Local { } ) }) = @@ -50,10 +53,10 @@ runQuiet params repo = withQuietOutput createProcessSuccess $ - read, that will wait on the command, and - return True if it succeeded. Failure to wait will result in zombies. -} -pipeReadLazy :: [CommandParam] -> Repo -> IO (String, IO Bool) +pipeReadLazy :: [CommandParam] -> Repo -> IO (L.ByteString, IO Bool) pipeReadLazy params repo = assertLocal repo $ do (_, Just h, _, pid) <- createProcess p { std_out = CreatePipe } - c <- hGetContents h + c <- L.hGetContents h return (c, checkSuccessProcess pid) where p = gitCreateProcess params repo @@ -62,8 +65,8 @@ pipeReadLazy params repo = assertLocal repo $ do - - Nonzero exit status is ignored. -} -pipeReadStrict :: [CommandParam] -> Repo -> IO String -pipeReadStrict = pipeReadStrict' hGetContentsStrict +pipeReadStrict :: [CommandParam] -> Repo -> IO S.ByteString +pipeReadStrict = pipeReadStrict' S.hGetContents {- The reader action must be strict. -} pipeReadStrict' :: (Handle -> IO a) -> [CommandParam] -> Repo -> IO a @@ -93,21 +96,24 @@ pipeWrite params repo = assertLocal repo $ {- Reads null terminated output of a git command (as enabled by the -z - parameter), and splits it. -} -pipeNullSplit :: [CommandParam] -> Repo -> IO ([String], IO Bool) +pipeNullSplit :: [CommandParam] -> Repo -> IO ([L.ByteString], IO Bool) pipeNullSplit params repo = do (s, cleanup) <- pipeReadLazy params repo - return (filter (not . null) $ splitc sep s, cleanup) - where - sep = '\0' + return (filter (not . L.null) $ L.split 0 s, cleanup) -pipeNullSplitStrict :: [CommandParam] -> Repo -> IO [String] +{- Reads lazily, but converts each part to a strict ByteString for + - convenience. -} +pipeNullSplit' :: [CommandParam] -> Repo -> IO ([S.ByteString], IO Bool) +pipeNullSplit' params repo = do + (s, cleanup) <- pipeNullSplit params repo + return (map L.toStrict s, cleanup) + +pipeNullSplitStrict :: [CommandParam] -> Repo -> IO [S.ByteString] pipeNullSplitStrict params repo = do s <- pipeReadStrict params repo - return $ filter (not . null) $ splitc sep s - where - sep = '\0' + return $ filter (not . S.null) $ S.split 0 s -pipeNullSplitZombie :: [CommandParam] -> Repo -> IO [String] +pipeNullSplitZombie :: [CommandParam] -> Repo -> IO [L.ByteString] pipeNullSplitZombie params repo = leaveZombie <$> pipeNullSplit params repo {- Doesn't run the cleanup action. A zombie results. -} diff --git a/Git/Construct.hs b/Git/Construct.hs index d032c59c39..7191f33036 100644 --- a/Git/Construct.hs +++ b/Git/Construct.hs @@ -58,7 +58,7 @@ fromPath dir = fromAbsPath =<< absPath dir - specified. -} fromAbsPath :: FilePath -> IO Repo fromAbsPath dir - | absoluteGitPath dir = hunt + | absoluteGitPath (encodeBS dir) = hunt | otherwise = error $ "internal error, " ++ dir ++ " is not absolute" where diff --git a/Git/DiffTree.hs b/Git/DiffTree.hs index 0aad4db188..f6c5c60955 100644 --- a/Git/DiffTree.hs +++ b/Git/DiffTree.hs @@ -89,7 +89,7 @@ commitDiff ref = getdiff (Param "show") getdiff :: CommandParam -> [CommandParam] -> Repo -> IO ([DiffTreeItem], IO Bool) getdiff command params repo = do (diff, cleanup) <- pipeNullSplit ps repo - return (parseDiffRaw diff, cleanup) + return (parseDiffRaw (map decodeBL diff), cleanup) where ps = command : @@ -113,7 +113,7 @@ parseDiffRaw l = go l , srcsha = fromMaybe (error "bad srcsha") $ extractSha ssha , dstsha = fromMaybe (error "bad dstsha") $ extractSha dsha , status = s - , file = asTopFilePath $ fromInternalGitPath $ Git.Filename.decode f + , file = asTopFilePath $ fromRawFilePath $ fromInternalGitPath $ Git.Filename.decode $ toRawFilePath f } where readmode = fst . Prelude.head . readOct diff --git a/Git/FilePath.hs b/Git/FilePath.hs index f0c3b69ed7..fffbea98d4 100644 --- a/Git/FilePath.hs +++ b/Git/FilePath.hs @@ -34,7 +34,7 @@ import qualified System.FilePath.Posix import GHC.Generics import Control.DeepSeq -{- A FilePath, relative to the top of the git repository. -} +{- A RawFilePath, relative to the top of the git repository. -} newtype TopFilePath = TopFilePath { getTopFilePath :: FilePath } deriving (Show, Eq, Ord, Generic) @@ -46,7 +46,7 @@ data BranchFilePath = BranchFilePath Ref TopFilePath {- Git uses the branch:file form to refer to a BranchFilePath -} descBranchFilePath :: BranchFilePath -> String -descBranchFilePath (BranchFilePath b f) = fromRef b ++ ':' : getTopFilePath f +descBranchFilePath (BranchFilePath b f) = fromRef b ++ ':' : (getTopFilePath f) {- Path to a TopFilePath, within the provided git repo. -} fromTopFilePath :: TopFilePath -> Git.Repo -> FilePath @@ -68,25 +68,25 @@ asTopFilePath file = TopFilePath file - despite Windows using '\'. - -} -type InternalGitPath = String +type InternalGitPath = RawFilePath -toInternalGitPath :: FilePath -> InternalGitPath +toInternalGitPath :: RawFilePath -> InternalGitPath #ifndef mingw32_HOST_OS toInternalGitPath = id #else -toInternalGitPath = replace "\\" "/" +toInternalGitPath = encodeBS . replace "\\" "/" . decodeBS #endif -fromInternalGitPath :: InternalGitPath -> FilePath +fromInternalGitPath :: InternalGitPath -> RawFilePath #ifndef mingw32_HOST_OS fromInternalGitPath = id #else -fromInternalGitPath = replace "/" "\\" +fromInternalGitPath = encodeBS . replace "/" "\\" . decodeBS #endif {- isAbsolute on Windows does not think "/foo" or "\foo" is absolute, - so try posix paths. -} -absoluteGitPath :: FilePath -> Bool -absoluteGitPath p = isAbsolute p || - System.FilePath.Posix.isAbsolute (toInternalGitPath p) +absoluteGitPath :: RawFilePath -> Bool +absoluteGitPath p = isAbsolute (decodeBS p) || + System.FilePath.Posix.isAbsolute (decodeBS (toInternalGitPath p)) diff --git a/Git/Filename.hs b/Git/Filename.hs index 40a449c9c9..52dce828e3 100644 --- a/Git/Filename.hs +++ b/Git/Filename.hs @@ -12,23 +12,29 @@ import Common import Utility.Format (decode_c, encode_c) import Data.Char +import Data.Word +import qualified Data.ByteString as S -decode :: String -> FilePath -decode [] = [] -decode f@(c:s) - -- encoded strings will be inside double quotes - | c == '"' && end s == ['"'] = decode_c $ beginning s - | otherwise = f +-- encoded filenames will be inside double quotes +decode :: S.ByteString -> RawFilePath +decode b = case S.uncons b of + Nothing -> b + Just (h, t) + | h /= q -> b + | otherwise -> case S.unsnoc t of + Nothing -> b + Just (i, l) + | l /= q -> b + | otherwise -> + encodeBS $ decode_c $ decodeBS i + where + q :: Word8 + q = fromIntegral (ord '"') {- Should not need to use this, except for testing decode. -} -encode :: FilePath -> String -encode s = "\"" ++ encode_c s ++ "\"" +encode :: RawFilePath -> S.ByteString +encode s = encodeBS $ "\"" ++ encode_c (decodeBS s) ++ "\"" -{- For quickcheck. - - - - See comment on Utility.Format.prop_encode_c_decode_c_roundtrip for - - why this only tests chars < 256 -} -prop_encode_decode_roundtrip :: String -> Bool -prop_encode_decode_roundtrip s = s' == decode (encode s') - where - s' = filter (\c -> ord c < 256) s +{- For quickcheck. -} +prop_encode_decode_roundtrip :: RawFilePath -> Bool +prop_encode_decode_roundtrip s = s == decode (encode s) diff --git a/Git/HashObject.hs b/Git/HashObject.hs index 2453cc1c5f..605e6d504c 100644 --- a/Git/HashObject.hs +++ b/Git/HashObject.hs @@ -73,4 +73,4 @@ hashObject' objtype writer repo = getSha subcmd $ pipeWriteRead (map Param params) (Just writer) repo where subcmd = "hash-object" - params = [subcmd, "-t", show objtype, "-w", "--stdin", "--no-filters"] + params = [subcmd, "-t", decodeBS (fmtObjectType objtype), "-w", "--stdin", "--no-filters"] diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index d27146c890..160c0c1ec1 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -34,37 +34,40 @@ import Git.Sha import Numeric import System.Posix.Types +import qualified Data.ByteString.Lazy as L {- Scans for files that are checked into git's index at the specified locations. -} -inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) +inRepo :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) inRepo = inRepo' [] -inRepo' :: [CommandParam] -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) -inRepo' ps l = pipeNullSplit $ - Param "ls-files" : - Param "--cached" : - Param "-z" : - ps ++ - (Param "--" : map File l) +inRepo' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) +inRepo' ps l repo = pipeNullSplit' params repo + where + params = + Param "ls-files" : + Param "--cached" : + Param "-z" : + ps ++ + (Param "--" : map (File . fromRawFilePath) l) {- Files that are checked into the index or have been committed to a - branch. -} -inRepoOrBranch :: Branch -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) +inRepoOrBranch :: Branch -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) inRepoOrBranch (Ref b) = inRepo' [Param $ "--with-tree=" ++ b] {- Scans for files at the specified locations that are not checked into git. -} -notInRepo :: Bool -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) +notInRepo :: Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) notInRepo = notInRepo' [] -notInRepo' :: [CommandParam] -> Bool -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) -notInRepo' ps include_ignored l repo = pipeNullSplit params repo +notInRepo' :: [CommandParam] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) +notInRepo' ps include_ignored l repo = pipeNullSplit' params repo where params = concat [ [ Param "ls-files", Param "--others"] , ps , exclude , [ Param "-z", Param "--" ] - , map File l + , map (File . fromRawFilePath) l ] exclude | include_ignored = [] @@ -72,48 +75,48 @@ notInRepo' ps include_ignored l repo = pipeNullSplit params repo {- Scans for files at the specified locations that are not checked into - git. Empty directories are included in the result. -} -notInRepoIncludingEmptyDirectories :: Bool -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) +notInRepoIncludingEmptyDirectories :: Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) notInRepoIncludingEmptyDirectories = notInRepo' [Param "--directory"] {- Finds all files in the specified locations, whether checked into git or - not. -} -allFiles :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) -allFiles l = pipeNullSplit $ +allFiles :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) +allFiles l = pipeNullSplit' $ Param "ls-files" : Param "--cached" : Param "--others" : Param "-z" : Param "--" : - map File l + map (File . fromRawFilePath) l {- Returns a list of files in the specified locations that have been - deleted. -} -deleted :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) -deleted l repo = pipeNullSplit params repo +deleted :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) +deleted l repo = pipeNullSplit' params repo where params = Param "ls-files" : Param "--deleted" : Param "-z" : Param "--" : - map File l + map (File . fromRawFilePath) l {- Returns a list of files in the specified locations that have been - modified. -} -modified :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) -modified l repo = pipeNullSplit params repo +modified :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) +modified l repo = pipeNullSplit' params repo where params = Param "ls-files" : Param "--modified" : Param "-z" : Param "--" : - map File l + map (File . fromRawFilePath) l {- Files that have been modified or are not checked into git (and are not - ignored). -} -modifiedOthers :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) -modifiedOthers l repo = pipeNullSplit params repo +modifiedOthers :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) +modifiedOthers l repo = pipeNullSplit' params repo where params = Param "ls-files" : @@ -122,69 +125,69 @@ modifiedOthers l repo = pipeNullSplit params repo Param "--exclude-standard" : Param "-z" : Param "--" : - map File l + map (File . fromRawFilePath) l {- Returns a list of all files that are staged for commit. -} -staged :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) +staged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) staged = staged' [] {- Returns a list of the files, staged for commit, that are being added, - moved, or changed (but not deleted), from the specified locations. -} -stagedNotDeleted :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) +stagedNotDeleted :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) stagedNotDeleted = staged' [Param "--diff-filter=ACMRT"] -staged' :: [CommandParam] -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) -staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix +staged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) +staged' ps l repo = pipeNullSplit' (prefix ++ ps ++ suffix) repo where prefix = [Param "diff", Param "--cached", Param "--name-only", Param "-z"] - suffix = Param "--" : map File l + suffix = Param "--" : map (File . fromRawFilePath) l -type StagedDetails = (FilePath, Maybe Sha, Maybe FileMode) +type StagedDetails = (RawFilePath, Maybe Sha, Maybe FileMode) {- Returns details about files that are staged in the index, - as well as files not yet in git. Skips ignored files. -} -stagedOthersDetails :: [FilePath] -> Repo -> IO ([StagedDetails], IO Bool) +stagedOthersDetails :: [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool) stagedOthersDetails = stagedDetails' [Param "--others", Param "--exclude-standard"] {- Returns details about all files that are staged in the index. -} -stagedDetails :: [FilePath] -> Repo -> IO ([StagedDetails], IO Bool) +stagedDetails :: [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool) stagedDetails = stagedDetails' [] {- Gets details about staged files, including the Sha of their staged - contents. -} -stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([StagedDetails], IO Bool) +stagedDetails' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool) stagedDetails' ps l repo = do (ls, cleanup) <- pipeNullSplit params repo return (map parse ls, cleanup) where params = Param "ls-files" : Param "--stage" : Param "-z" : ps ++ - Param "--" : map File l + Param "--" : map (File . fromRawFilePath) l parse s - | null file = (s, Nothing, Nothing) - | otherwise = (file, extractSha $ take shaSize rest, readmode mode) + | null file = (L.toStrict s, Nothing, Nothing) + | otherwise = (toRawFilePath file, extractSha $ take shaSize rest, readmode mode) where - (metadata, file) = separate (== '\t') s + (metadata, file) = separate (== '\t') (decodeBL' s) (mode, rest) = separate (== ' ') metadata readmode = fst <$$> headMaybe . readOct {- Returns a list of the files in the specified locations that are staged - for commit, and whose type has changed. -} -typeChangedStaged :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) +typeChangedStaged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) typeChangedStaged = typeChanged' [Param "--cached"] {- Returns a list of the files in the specified locations whose type has - changed. Files only staged for commit will not be included. -} -typeChanged :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) +typeChanged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) typeChanged = typeChanged' [] -typeChanged' :: [CommandParam] -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) +typeChanged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) typeChanged' ps l repo = do (fs, cleanup) <- pipeNullSplit (prefix ++ ps ++ suffix) repo -- git diff returns filenames relative to the top of the git repo; -- convert to filenames relative to the cwd, like git ls-files. top <- absPath (repoPath repo) currdir <- getCurrentDirectory - return (map (\f -> relPathDirToFileAbs currdir $ top f) fs, cleanup) + return (map (\f -> toRawFilePath (relPathDirToFileAbs currdir $ top decodeBL' f)) fs, cleanup) where prefix = [ Param "diff" @@ -192,7 +195,7 @@ typeChanged' ps l repo = do , Param "--diff-filter=T" , Param "-z" ] - suffix = Param "--" : (if null l then [File "."] else map File l) + suffix = Param "--" : (if null l then [File "."] else map (File . fromRawFilePath) l) {- A item in conflict has two possible values. - Either can be Nothing, when that side deleted the file. -} @@ -202,7 +205,7 @@ data Conflicting v = Conflicting } deriving (Show) data Unmerged = Unmerged - { unmergedFile :: FilePath + { unmergedFile :: RawFilePath , unmergedTreeItemType :: Conflicting TreeItemType , unmergedSha :: Conflicting Sha } @@ -217,21 +220,21 @@ data Unmerged = Unmerged - 3 = them - If a line is omitted, that side removed the file. -} -unmerged :: [FilePath] -> Repo -> IO ([Unmerged], IO Bool) +unmerged :: [RawFilePath] -> Repo -> IO ([Unmerged], IO Bool) unmerged l repo = do (fs, cleanup) <- pipeNullSplit params repo - return (reduceUnmerged [] $ catMaybes $ map parseUnmerged fs, cleanup) + return (reduceUnmerged [] $ catMaybes $ map (parseUnmerged . decodeBL') fs, cleanup) where params = Param "ls-files" : Param "--unmerged" : Param "-z" : Param "--" : - map File l + map (File . fromRawFilePath) l data InternalUnmerged = InternalUnmerged { isus :: Bool - , ifile :: FilePath + , ifile :: RawFilePath , itreeitemtype :: Maybe TreeItemType , isha :: Maybe Sha } @@ -245,9 +248,9 @@ parseUnmerged s if stage /= 2 && stage /= 3 then Nothing else do - treeitemtype <- readTreeItemType rawtreeitemtype + treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype) sha <- extractSha rawsha - return $ InternalUnmerged (stage == 2) file + return $ InternalUnmerged (stage == 2) (toRawFilePath file) (Just treeitemtype) (Just sha) _ -> Nothing where diff --git a/Git/LsTree.hs b/Git/LsTree.hs index 8ca805402b..aa3651a543 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -26,12 +26,16 @@ import Git.FilePath import qualified Git.Filename import Numeric -import Data.Char +import Data.Either import System.Posix.Types +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L +import qualified Data.Attoparsec.ByteString.Lazy as A +import qualified Data.Attoparsec.ByteString.Char8 as A8 data TreeItem = TreeItem { mode :: FileMode - , typeobj :: String + , typeobj :: S.ByteString , sha :: Ref , file :: TopFilePath } deriving Show @@ -45,7 +49,7 @@ lsTree = lsTree' [] lsTree' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool) lsTree' ps lsmode t repo = do (l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo - return (map parseLsTree l, cleanup) + return (rights (map parseLsTree l), cleanup) lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam] lsTreeParams lsmode r ps = @@ -63,7 +67,8 @@ lsTreeParams lsmode r ps = {- Lists specified files in a tree. -} lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem] -lsTreeFiles t fs repo = map parseLsTree <$> pipeNullSplitStrict ps repo +lsTreeFiles t fs repo = rights . map (parseLsTree . L.fromStrict) + <$> pipeNullSplitStrict ps repo where ps = [ Param "ls-tree" @@ -73,30 +78,34 @@ lsTreeFiles t fs repo = map parseLsTree <$> pipeNullSplitStrict ps repo , File $ fromRef t ] ++ map File fs +parseLsTree :: L.ByteString -> Either String TreeItem +parseLsTree b = case A.parse parserLsTree b of + A.Done _ r -> Right r + A.Fail _ _ err -> Left err + {- Parses a line of ls-tree output, in format: - mode SP type SP sha TAB file - - (The --long format is not currently supported.) -} -parseLsTree :: String -> TreeItem -parseLsTree l = TreeItem - { mode = smode - , typeobj = t - , sha = Ref s - , file = sfile - } - where - (m, past_m) = splitAt 7 l -- mode is 6 bytes - (!t, past_t) = separate isSpace past_m - (!s, past_s) = splitAt shaSize past_t - !f = drop 1 past_s - !smode = fst $ Prelude.head $ readOct m - !sfile = asTopFilePath $ Git.Filename.decode f +parserLsTree :: A.Parser TreeItem +parserLsTree = TreeItem + -- mode + <$> A8.decimal + <* A8.char ' ' + -- type + <*> A.takeTill (== 32) + <* A8.char ' ' + -- sha + <*> (Ref . decodeBS' <$> A.take shaSize) + <* A8.char '\t' + -- file + <*> (asTopFilePath . decodeBS' . Git.Filename.decode <$> A.takeByteString) {- Inverse of parseLsTree -} formatLsTree :: TreeItem -> String formatLsTree ti = unwords [ showOct (mode ti) "" - , typeobj ti + , decodeBS (typeobj ti) , fromRef (sha ti) , getTopFilePath (file ti) ] diff --git a/Git/Ref.hs b/Git/Ref.hs index 964dbafb08..d0542f4f84 100644 --- a/Git/Ref.hs +++ b/Git/Ref.hs @@ -5,6 +5,8 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE OverloadedStrings #-} + module Git.Ref where import Common @@ -13,7 +15,8 @@ import Git.Command import Git.Sha import Git.Types -import Data.Char (chr) +import Data.Char (chr, ord) +import qualified Data.ByteString as S headRef :: Ref headRef = Ref "HEAD" @@ -88,8 +91,10 @@ file ref repo = localGitDir repo fromRef ref - that was just created. -} headExists :: Repo -> IO Bool headExists repo = do - ls <- lines <$> pipeReadStrict [Param "show-ref", Param "--head"] repo - return $ any (" HEAD" `isSuffixOf`) ls + ls <- S.split nl <$> pipeReadStrict [Param "show-ref", Param "--head"] repo + return $ any (" HEAD" `S.isSuffixOf`) ls + where + nl = fromIntegral (ord '\n') {- Get the sha of a fully qualified git ref, if it exists. -} sha :: Branch -> Repo -> IO (Maybe Sha) @@ -100,8 +105,9 @@ sha branch repo = process <$> showref repo , Param "--hash" -- get the hash , Param $ fromRef branch ] - process [] = Nothing - process s = Just $ Ref $ firstLine s + process s + | S.null s = Nothing + | otherwise = Just $ Ref $ decodeBS' $ firstLine' s headSha :: Repo -> IO (Maybe Sha) headSha = sha headRef @@ -116,7 +122,7 @@ matchingWithHEAD refs repo = matching' ("--head" : map fromRef refs) repo {- List of (shas, branches) matching a given ref spec. -} matching' :: [String] -> Repo -> IO [(Sha, Branch)] -matching' ps repo = map gen . lines <$> +matching' ps repo = map gen . lines . decodeBS' <$> pipeReadStrict (Param "show-ref" : map Param ps) repo where gen l = let (r, b) = separate (== ' ') l @@ -148,7 +154,7 @@ delete oldvalue ref = run - The ref may be something like a branch name, and it could contain - ":subdir" if a subtree is wanted. -} tree :: Ref -> Repo -> IO (Maybe Sha) -tree (Ref ref) = extractSha <$$> pipeReadStrict +tree (Ref ref) = extractSha . decodeBS <$$> pipeReadStrict [ Param "rev-parse", Param "--verify", Param "--quiet", Param ref' ] where ref' = if ":" `isInfixOf` ref diff --git a/Git/RefLog.hs b/Git/RefLog.hs index 2c5a65d74a..7ba8713af7 100644 --- a/Git/RefLog.hs +++ b/Git/RefLog.hs @@ -21,7 +21,7 @@ getMulti :: [Branch] -> Repo -> IO [Sha] getMulti bs = get' (map (Param . fromRef) bs) get' :: [CommandParam] -> Repo -> IO [Sha] -get' ps = mapMaybe extractSha . lines <$$> pipeReadStrict ps' +get' ps = mapMaybe extractSha . lines . decodeBS <$$> pipeReadStrict ps' where ps' = catMaybes [ Just $ Param "log" diff --git a/Git/Repair.hs b/Git/Repair.hs index e6267a5f54..734c884f60 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -284,7 +284,7 @@ findUncorruptedCommit missing goodcommits branch r = do , Param "--format=%H" , Param (fromRef branch) ] r - let branchshas = catMaybes $ map extractSha ls + let branchshas = catMaybes $ map (extractSha . decodeBS) ls reflogshas <- RefLog.get branch r -- XXX Could try a bit harder here, and look -- for uncorrupted old commits in branches in the @@ -313,7 +313,7 @@ verifyCommit missing goodcommits commit r , Param "--format=%H %T" , Param (fromRef commit) ] r - let committrees = map parse ls + let committrees = map (parse . decodeBS) ls if any isNothing committrees || null committrees then do void cleanup diff --git a/Git/Status.hs b/Git/Status.hs index 5a1077baf8..c15a11bd63 100644 --- a/Git/Status.hs +++ b/Git/Status.hs @@ -69,7 +69,7 @@ parseStatusZ = go [] getStatus :: [CommandParam] -> [FilePath] -> Repo -> IO ([StagedUnstaged Status], IO Bool) getStatus ps fs r = do (ls, cleanup) <- pipeNullSplit ps' r - return (parseStatusZ ls, cleanup) + return (parseStatusZ (map decodeBL ls), cleanup) where ps' = concat [ [Param "status"] diff --git a/Git/Tree.hs b/Git/Tree.hs index 3a8851099a..8a69c53a2a 100644 --- a/Git/Tree.hs +++ b/Git/Tree.hs @@ -115,7 +115,7 @@ mkTreeOutput :: FileMode -> ObjectType -> Sha -> TopFilePath -> String mkTreeOutput fm ot s f = concat [ showOct fm "" , " " - , show ot + , decodeBS (fmtObjectType ot) , " " , fromRef s , "\t" @@ -134,7 +134,7 @@ treeItemToTreeContent (TreeItem f m s) = case toTreeItemType m of treeItemToLsTreeItem :: TreeItem -> LsTree.TreeItem treeItemToLsTreeItem (TreeItem f mode sha) = LsTree.TreeItem { LsTree.mode = mode - , LsTree.typeobj = show BlobObject + , LsTree.typeobj = fmtObjectType BlobObject , LsTree.sha = sha , LsTree.file = f } @@ -239,7 +239,7 @@ adjustTree adjusttreeitem addtreeitems resolveaddconflict removefiles r repo = Just CommitObject -> do let ti = TreeCommit (LsTree.file i) (LsTree.mode i) (LsTree.sha i) go h wasmodified (ti:c) depth intree is - _ -> error ("unexpected object type \"" ++ LsTree.typeobj i ++ "\"") + _ -> error ("unexpected object type \"" ++ decodeBS (LsTree.typeobj i) ++ "\"") | otherwise = return (c, wasmodified, i:is) adjustlist h depth ishere underhere l = do @@ -328,7 +328,7 @@ graftTree' subtree graftloc basetree repo hdl = go basetree graftdirs -- For a graftloc of "foo/bar/baz", this generates -- ["foo", "foo/bar", "foo/bar/baz"] - graftdirs = map (asTopFilePath . toInternalGitPath) $ + graftdirs = map (asTopFilePath . decodeBS . toInternalGitPath . encodeBS) $ mkpaths [] $ splitDirectories $ gitPath graftloc mkpaths _ [] = [] mkpaths base (d:rest) = (joinPath base d) : mkpaths (base ++ [d]) rest @@ -355,7 +355,7 @@ extractTree l = case go [] inTopTree l of Just CommitObject -> let c = TreeCommit (LsTree.file i) (LsTree.mode i) (LsTree.sha i) in go (c:t) intree is - _ -> parseerr ("unexpected object type \"" ++ LsTree.typeobj i ++ "\"") + _ -> parseerr ("unexpected object type \"" ++ decodeBS (LsTree.typeobj i) ++ "\"") | otherwise = Right (t, i:is) parseerr = Left diff --git a/Git/Types.hs b/Git/Types.hs index 4a4dff0c53..90401db153 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -5,13 +5,17 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE OverloadedStrings #-} + module Git.Types where import Network.URI import qualified Data.Map as M +import qualified Data.ByteString as S import System.Posix.Types import Utility.SafeCommand + {- Support repositories on local disk, and repositories accessed via an URL. - - Repos on local disk have a git directory, and unless bare, a worktree. @@ -64,32 +68,31 @@ newtype RefDate = RefDate String {- Types of objects that can be stored in git. -} data ObjectType = BlobObject | CommitObject | TreeObject - deriving (Eq) -instance Show ObjectType where - show BlobObject = "blob" - show CommitObject = "commit" - show TreeObject = "tree" - -readObjectType :: String -> Maybe ObjectType +readObjectType :: S.ByteString -> Maybe ObjectType readObjectType "blob" = Just BlobObject readObjectType "commit" = Just CommitObject readObjectType "tree" = Just TreeObject readObjectType _ = Nothing +fmtObjectType :: ObjectType -> S.ByteString +fmtObjectType BlobObject = "blob" +fmtObjectType CommitObject = "commit" +fmtObjectType TreeObject = "tree" + {- Types of items in a tree. -} data TreeItemType = TreeFile | TreeExecutable | TreeSymlink | TreeSubmodule deriving (Eq) {- Git uses magic numbers to denote the type of a tree item. -} -readTreeItemType :: String -> Maybe TreeItemType +readTreeItemType :: S.ByteString -> Maybe TreeItemType readTreeItemType "100644" = Just TreeFile readTreeItemType "100755" = Just TreeExecutable readTreeItemType "120000" = Just TreeSymlink readTreeItemType "160000" = Just TreeSubmodule readTreeItemType _ = Nothing -fmtTreeItemType :: TreeItemType -> String +fmtTreeItemType :: TreeItemType -> S.ByteString fmtTreeItemType TreeFile = "100644" fmtTreeItemType TreeExecutable = "100755" fmtTreeItemType TreeSymlink = "120000" diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index ea9dd3500e..fc3c30e2ac 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -10,6 +10,7 @@ module Git.UnionMerge ( mergeIndex ) where +import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy.Char8 as L8 import qualified Data.Set as S @@ -69,7 +70,7 @@ diffOpts = ["--raw", "-z", "-r", "--no-renames", "-l0"] doMerge :: HashObjectHandle -> CatFileHandle -> [String] -> Repo -> Streamer doMerge hashhandle ch differ repo streamer = do (diff, cleanup) <- pipeNullSplit (map Param differ) repo - go diff + go (map decodeBL' diff) void $ cleanup where go [] = noop @@ -80,7 +81,7 @@ doMerge hashhandle ch differ repo streamer = do {- Given an info line from a git raw diff, and the filename, generates - a line suitable for update-index that union merges the two sides of the - diff. -} -mergeFile :: String -> FilePath -> HashObjectHandle -> CatFileHandle -> IO (Maybe String) +mergeFile :: String -> FilePath -> HashObjectHandle -> CatFileHandle -> IO (Maybe L.ByteString) mergeFile info file hashhandle h = case filter (/= nullSha) [Ref asha, Ref bsha] of [] -> return Nothing (sha:[]) -> use sha diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index 435c4f28e2..76094a3432 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -1,11 +1,11 @@ {- git-update-index library - - - Copyright 2011-2018 Joey Hess + - Copyright 2011-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} -{-# LANGUAGE BangPatterns, CPP #-} +{-# LANGUAGE BangPatterns, OverloadedStrings, CPP #-} module Git.UpdateIndex ( Streamer, @@ -32,12 +32,14 @@ import Git.FilePath import Git.Sha import qualified Git.DiffTreeItem as Diff +import qualified Data.ByteString.Lazy as L + {- Streamers are passed a callback and should feed it lines in the form - read by update-index, and generated by ls-tree. -} -type Streamer = (String -> IO ()) -> IO () +type Streamer = (L.ByteString -> IO ()) -> IO () {- A streamer with a precalculated value. -} -pureStreamer :: String -> Streamer +pureStreamer :: L.ByteString -> Streamer pureStreamer !s = \streamer -> streamer s {- Streams content into update-index from a list of Streamers. -} @@ -49,8 +51,8 @@ data UpdateIndexHandle = UpdateIndexHandle ProcessHandle Handle streamUpdateIndex' :: UpdateIndexHandle -> Streamer -> IO () streamUpdateIndex' (UpdateIndexHandle _ h) a = a $ \s -> do - hPutStr h s - hPutStr h "\0" + L.hPutStr h s + L.hPutStr h "\0" startUpdateIndex :: Repo -> IO UpdateIndexHandle startUpdateIndex repo = do @@ -84,14 +86,13 @@ lsSubTree (Ref x) p repo streamer = do {- Generates a line suitable to be fed into update-index, to add - a given file with a given sha. -} -updateIndexLine :: Sha -> TreeItemType -> TopFilePath -> String -updateIndexLine sha treeitemtype file = concat - [ fmtTreeItemType treeitemtype - , " blob " - , fromRef sha - , "\t" - , indexPath file - ] +updateIndexLine :: Sha -> TreeItemType -> TopFilePath -> L.ByteString +updateIndexLine sha treeitemtype file = L.fromStrict $ + fmtTreeItemType treeitemtype + <> " blob " + <> encodeBS (fromRef sha) + <> "\t" + <> indexPath file stageFile :: Sha -> TreeItemType -> FilePath -> Repo -> IO Streamer stageFile sha treeitemtype file repo = do @@ -105,7 +106,11 @@ unstageFile file repo = do return $ unstageFile' p unstageFile' :: TopFilePath -> Streamer -unstageFile' p = pureStreamer $ "0 " ++ fromRef nullSha ++ "\t" ++ indexPath p +unstageFile' p = pureStreamer $ L.fromStrict $ + "0 " + <> encodeBS' (fromRef nullSha) + <> "\t" + <> indexPath p {- A streamer that adds a symlink to the index. -} stageSymlink :: FilePath -> Sha -> Repo -> IO Streamer @@ -123,7 +128,7 @@ stageDiffTreeItem d = case toTreeItemType (Diff.dstmode d) of Just t -> pureStreamer $ updateIndexLine (Diff.dstsha d) t (Diff.file d) indexPath :: TopFilePath -> InternalGitPath -indexPath = toInternalGitPath . getTopFilePath +indexPath = toInternalGitPath . toRawFilePath . getTopFilePath {- Refreshes the index, by checking file stat information. -} refreshIndex :: Repo -> ((FilePath -> IO ()) -> IO ()) -> IO Bool diff --git a/Types/Export.hs b/Types/Export.hs index 437d74e286..b90b5cbe5e 100644 --- a/Types/Export.hs +++ b/Types/Export.hs @@ -17,38 +17,39 @@ module Types.Export ( import Git.FilePath import Utility.Split +import Utility.FileSystemEncoding import qualified System.FilePath.Posix as Posix -- A location on a remote that a key can be exported to. --- The FilePath will be relative to the top of the remote, +-- The RawFilePath will be relative to the top of the remote, -- and uses unix-style path separators. -newtype ExportLocation = ExportLocation FilePath +newtype ExportLocation = ExportLocation RawFilePath deriving (Show, Eq) -mkExportLocation :: FilePath -> ExportLocation +mkExportLocation :: RawFilePath -> ExportLocation mkExportLocation = ExportLocation . toInternalGitPath -fromExportLocation :: ExportLocation -> FilePath +fromExportLocation :: ExportLocation -> RawFilePath fromExportLocation (ExportLocation f) = f -newtype ExportDirectory = ExportDirectory FilePath +newtype ExportDirectory = ExportDirectory RawFilePath deriving (Show, Eq) -mkExportDirectory :: FilePath -> ExportDirectory +mkExportDirectory :: RawFilePath -> ExportDirectory mkExportDirectory = ExportDirectory . toInternalGitPath -fromExportDirectory :: ExportDirectory -> FilePath +fromExportDirectory :: ExportDirectory -> RawFilePath fromExportDirectory (ExportDirectory f) = f -- | All subdirectories down to the ExportLocation, with the deepest ones -- last. Does not include the top of the export. exportDirectories :: ExportLocation -> [ExportDirectory] exportDirectories (ExportLocation f) = - map (ExportDirectory . Posix.joinPath . reverse) (subs [] dirs) + map (ExportDirectory . encodeBS . Posix.joinPath . reverse) (subs [] dirs) where subs _ [] = [] subs ps (d:ds) = (d:ps) : subs (d:ps) ds dirs = map Posix.dropTrailingPathSeparator $ - dropFromEnd 1 $ Posix.splitPath f + dropFromEnd 1 $ Posix.splitPath $ decodeBS f diff --git a/Types/Import.hs b/Types/Import.hs index c6d94edb61..a297af76e6 100644 --- a/Types/Import.hs +++ b/Types/Import.hs @@ -19,10 +19,10 @@ import Utility.FileSystemEncoding - location on the remote. -} type ImportLocation = ExportLocation -mkImportLocation :: FilePath -> ImportLocation +mkImportLocation :: RawFilePath -> ImportLocation mkImportLocation = mkExportLocation -fromImportLocation :: ImportLocation -> FilePath +fromImportLocation :: ImportLocation -> RawFilePath fromImportLocation = fromExportLocation {- An identifier for content stored on a remote that has been imported into diff --git a/Utility/Misc.hs b/Utility/Misc.hs index de77c949a0..2f1766ec23 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -12,6 +12,7 @@ module Utility.Misc ( readFileStrict, separate, firstLine, + firstLine', segment, segmentDelim, massReplace, @@ -28,6 +29,7 @@ import Data.Char import Data.List import System.Exit import Control.Applicative +import qualified Data.ByteString as S import Prelude {- A version of hgetContents that is not lazy. Ensures file is @@ -56,6 +58,11 @@ separate c l = unbreak $ break c l firstLine :: String -> String firstLine = takeWhile (/= '\n') +firstLine' :: S.ByteString -> S.ByteString +firstLine' = S.takeWhile (/= nl) + where + nl = fromIntegral (ord '\n') + {- Splits a list into segments that are delimited by items matching - a predicate. (The delimiters are not included in the segments.) - Segments may be empty. -} diff --git a/git-annex.cabal b/git-annex.cabal index 5d8ba73914..83da5bcb74 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -602,7 +602,7 @@ Executable git-annex if flag(DebugLocks) CPP-Options: -DDEBUGLOCKS - Other-Modules: + Other-Modules-Temp-Disabled: Annex Annex.Action Annex.AdjustedBranch @@ -860,7 +860,6 @@ Executable git-annex Git.RefLog Git.Remote Git.Remote.Remove - Git.Repair Git.Sha Git.Ssh Git.Status