From 279991604d1e9cf601c0e9754bcdafc98b538608 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 6 Apr 2020 17:14:49 -0400 Subject: [PATCH] started converting Ref from String to ByteString This should make code that reads shas and refs from git faster. Does not compile yet, a lot needs to be done still. --- Database/Types.hs | 5 +++-- Git/Command.hs | 9 +++++++-- Git/Credential.hs | 2 +- Git/FilePath.hs | 2 +- Git/Fsck.hs | 3 ++- Git/HashObject.hs | 3 ++- Git/History.hs | 10 +++++++--- Git/LsFiles.hs | 2 +- Git/LsTree.hs | 2 +- Git/Objects.hs | 4 ++-- Git/Ref.hs | 16 ++++++++++------ Git/RefLog.hs | 7 +++++-- Git/Sha.hs | 32 +++++++++++++++++++++++--------- Git/Types.hs | 4 ++-- Git/UpdateIndex.hs | 8 ++++---- Types/RefSpec.hs | 6 +++--- Utility/Process.hs | 16 ++++++---------- 17 files changed, 80 insertions(+), 51 deletions(-) diff --git a/Database/Types.hs b/Database/Types.hs index ac6aad2748..561972284a 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -28,6 +28,7 @@ import Foreign.C.Types import Key import Utility.InodeCache import Utility.FileSize +import Utility.FileSystemEncoding import Git.Types import Types.UUID import Types.Import @@ -94,10 +95,10 @@ newtype SSha = SSha String deriving (Eq, Show) toSSha :: Sha -> SSha -toSSha (Ref s) = SSha s +toSSha (Ref s) = SSha (decodeBS' s) fromSSha :: SSha -> Ref -fromSSha (SSha s) = Ref s +fromSSha (SSha s) = Ref (encodeBS' s) instance PersistField SSha where toPersistValue (SSha b) = toPersistValue b diff --git a/Git/Command.hs b/Git/Command.hs index eb20af2dc9..15157a08a5 100644 --- a/Git/Command.hs +++ b/Git/Command.hs @@ -81,11 +81,16 @@ pipeReadStrict' reader params repo = assertLocal repo $ {- Runs a git command, feeding it an input, and returning its output, - which is expected to be fairly small, since it's all read into memory - strictly. -} -pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO String +pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO S.ByteString pipeWriteRead params writer repo = assertLocal repo $ writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo) - (gitEnv repo) writer (Just adjusthandle) + (gitEnv repo) writer' where + writer' = case writer of + Nothing -> Nothing + Just a -> Just $ \h -> do + adjusthandle h + a h adjusthandle h = hSetNewlineMode h noNewlineTranslation {- Runs a git command, feeding it input on a handle with an action. -} diff --git a/Git/Credential.hs b/Git/Credential.hs index 9465d27963..2f926b0323 100644 --- a/Git/Credential.hs +++ b/Git/Credential.hs @@ -58,7 +58,7 @@ urlCredential = Credential . M.singleton "url" runCredential :: String -> Credential -> Repo -> IO Credential runCredential action input r = - parseCredential <$> pipeWriteRead + parseCredential . decodeBS <$> pipeWriteRead [ Param "credential" , Param action ] diff --git a/Git/FilePath.hs b/Git/FilePath.hs index 66a015994e..ea9cceaa87 100644 --- a/Git/FilePath.hs +++ b/Git/FilePath.hs @@ -50,7 +50,7 @@ data BranchFilePath = BranchFilePath Ref TopFilePath {- Git uses the branch:file form to refer to a BranchFilePath -} descBranchFilePath :: BranchFilePath -> S.ByteString descBranchFilePath (BranchFilePath b f) = - encodeBS' (fromRef b) <> ":" <> getTopFilePath f + fromRef b <> ":" <> getTopFilePath f {- Path to a TopFilePath, within the provided git repo. -} fromTopFilePath :: TopFilePath -> Git.Repo -> RawFilePath diff --git a/Git/Fsck.hs b/Git/Fsck.hs index 6f33e11991..69a9e9f81e 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -139,7 +139,8 @@ isMissing s r = either (const True) (const False) <$> tryIO dump ] r findShas :: [String] -> [Sha] -findShas = catMaybes . map extractSha . concat . map words . filter wanted +findShas = catMaybes . map (extractSha . encodeBS') + . concat . map words . filter wanted where wanted l = not ("dangling " `isPrefixOf` l) diff --git a/Git/HashObject.hs b/Git/HashObject.hs index 3787c9cb57..bcad9a1109 100644 --- a/Git/HashObject.hs +++ b/Git/HashObject.hs @@ -18,6 +18,7 @@ import qualified Utility.CoProcess as CoProcess import Utility.Tmp import qualified Data.ByteString as S +import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L import Data.ByteString.Builder @@ -39,7 +40,7 @@ hashFile :: HashObjectHandle -> FilePath -> IO Sha hashFile h file = CoProcess.query h send receive where send to = hPutStrLn to =<< absPath file - receive from = getSha "hash-object" $ hGetLine from + receive from = getSha "hash-object" $ S8.hGetLine from class HashableBlob t where hashableBlobToHandle :: Handle -> t -> IO () diff --git a/Git/History.hs b/Git/History.hs index 6706497317..5c202c9b5c 100644 --- a/Git/History.hs +++ b/Git/History.hs @@ -15,6 +15,9 @@ import Git.Command import Git.Sha import qualified Data.Set as S +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 data History t = History t (S.Set (History t)) deriving (Show, Eq, Ord) @@ -53,8 +56,9 @@ getHistoryToDepth n commit r = do !h <- fmap (truncateHistoryToDepth n) . build Nothing . map parsehistorycommit - . lines - <$> hGetContents inh + . map B.copy + . B8.lines + <$> L.hGetContents inh hClose inh void $ waitForProcess pid return h @@ -93,7 +97,7 @@ getHistoryToDepth n commit r = do , Param "--format=%T %H %P" ] - parsehistorycommit l = case map extractSha (splitc ' ' l) of + parsehistorycommit l = case map extractSha (S8.split ' ' l) of (Just t:Just c:ps) -> Just $ ( HistoryCommit { historyCommit = c diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index 31a74d4283..2291e9c42a 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -284,7 +284,7 @@ parseUnmerged s then Nothing else do treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype) - sha <- extractSha rawsha + sha <- extractSha (encodeBS' rawsha) return $ InternalUnmerged (stage == 2) (toRawFilePath file) (Just treeitemtype) (Just sha) _ -> Nothing diff --git a/Git/LsTree.hs b/Git/LsTree.hs index 5175c39024..ac059bbdff 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -106,6 +106,6 @@ formatLsTree :: TreeItem -> String formatLsTree ti = unwords [ showOct (mode ti) "" , decodeBS (typeobj ti) - , fromRef (sha ti) + , decodeBS' (fromRef (sha ti)) , fromRawFilePath (getTopFilePath (file ti)) ] diff --git a/Git/Objects.hs b/Git/Objects.hs index c9ede4da9a..6f76886f5f 100644 --- a/Git/Objects.hs +++ b/Git/Objects.hs @@ -26,13 +26,13 @@ listPackFiles r = filter (".pack" `isSuffixOf`) listLooseObjectShas :: Repo -> IO [Sha] listLooseObjectShas r = catchDefaultIO [] $ - mapMaybe (extractSha . concat . reverse . take 2 . reverse . splitDirectories) + mapMaybe (extractSha . encodeBS . concat . reverse . take 2 . reverse . splitDirectories) <$> dirContentsRecursiveSkipping (== "pack") True (objectsDir r) looseObjectFile :: Repo -> Sha -> FilePath looseObjectFile r sha = objectsDir r prefix rest where - (prefix, rest) = splitAt 2 (fromRef sha) + (prefix, rest) = splitAt 2 (decodeBS' (fromRef sha)) listAlternates :: Repo -> IO [FilePath] listAlternates r = catchDefaultIO [] (lines <$> readFile alternatesfile) diff --git a/Git/Ref.hs b/Git/Ref.hs index 621e328f27..433f423b9c 100644 --- a/Git/Ref.hs +++ b/Git/Ref.hs @@ -145,8 +145,8 @@ delete :: Sha -> Ref -> Repo -> IO () delete oldvalue ref = run [ Param "update-ref" , Param "-d" - , Param $ fromRef ref - , Param $ fromRef oldvalue + , Param $ decodeBS' (fromRef ref) + , Param $ decodeBS' (fromRef oldvalue) ] {- Gets the sha of the tree a ref uses. @@ -154,13 +154,17 @@ delete oldvalue ref = run - The ref may be something like a branch name, and it could contain - ":subdir" if a subtree is wanted. -} tree :: Ref -> Repo -> IO (Maybe Sha) -tree (Ref ref) = extractSha . decodeBS <$$> pipeReadStrict - [ Param "rev-parse", Param "--verify", Param "--quiet", Param ref' ] +tree (Ref ref) = extractSha <$$> pipeReadStrict + [ Param "rev-parse" + , Param "--verify" + , Param "--quiet" + , Param (decodeBS' ref') + ] where - ref' = if ":" `isInfixOf` ref + ref' = if ":" `S.isInfixOf` ref then ref -- de-reference commit objects to the tree - else ref ++ ":" + else ref <> ":" {- Checks if a String is a legal git ref name. - diff --git a/Git/RefLog.hs b/Git/RefLog.hs index 7ba8713af7..e8fe6a2217 100644 --- a/Git/RefLog.hs +++ b/Git/RefLog.hs @@ -12,16 +12,19 @@ import Git import Git.Command import Git.Sha +import qualified Data.ByteString as S +import qualified Data.ByteString.Char8 as S8 + {- Gets the reflog for a given branch. -} get :: Branch -> Repo -> IO [Sha] get b = getMulti [b] {- Gets reflogs for multiple branches. -} getMulti :: [Branch] -> Repo -> IO [Sha] -getMulti bs = get' (map (Param . fromRef) bs) +getMulti bs = get' (map (Param . decodeBS' . fromRef) bs) get' :: [CommandParam] -> Repo -> IO [Sha] -get' ps = mapMaybe extractSha . lines . decodeBS <$$> pipeReadStrict ps' +get' ps = mapMaybe (extractSha . S.copy) . S8.lines <$$> pipeReadStrict ps' where ps' = catMaybes [ Just $ Param "log" diff --git a/Git/Sha.hs b/Git/Sha.hs index 24fe546192..a66c34ee2c 100644 --- a/Git/Sha.hs +++ b/Git/Sha.hs @@ -5,31 +5,43 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE OverloadedStrings #-} + module Git.Sha where import Common import Git.Types +import qualified Data.ByteString as S +import Data.Char + {- Runs an action that causes a git subcommand to emit a Sha, and strips - any trailing newline, returning the sha. -} -getSha :: String -> IO String -> IO Sha +getSha :: String -> IO S.ByteString -> IO Sha getSha subcommand a = maybe bad return =<< extractSha <$> a where bad = error $ "failed to read sha from git " ++ subcommand -{- Extracts the Sha from a string. There can be a trailing newline after - - it, but nothing else. -} -extractSha :: String -> Maybe Sha +{- Extracts the Sha from a ByteString. + - + - There can be a trailing newline after it, but nothing else. + -} +extractSha :: S.ByteString -> Maybe Sha extractSha s | len `elem` shaSizes = val s - | len - 1 `elem` shaSizes && length s' == len - 1 = val s' + | len - 1 `elem` shaSizes && S.length s' == len - 1 = val s' | otherwise = Nothing where - len = length s - s' = firstLine s + len = S.length s + s' = firstLine' s val v - | all (`elem` "1234567890ABCDEFabcdef") v = Just $ Ref v + | S.all validinsha v = Just $ Ref v | otherwise = Nothing + validinsha w = or + [ w >= 48 && w <= 57 -- 0-9 + , w >= 97 && w <= 102 -- a-f + , w >= 65 && w <= 70 -- A-F + ] {- Sizes of git shas. -} shaSizes :: [Int] @@ -41,7 +53,9 @@ shaSizes = {- Git plumbing often uses a all 0 sha to represent things like a - deleted file. -} nullShas :: [Sha] -nullShas = map (\n -> Ref (replicate n '0')) shaSizes +nullShas = map (\n -> Ref (S.replicate n zero)) shaSizes + where + zero = fromIntegral (ord '0') {- Sha to provide to git plumbing when deleting a file. - diff --git a/Git/Types.hs b/Git/Types.hs index 9c2754a7d3..84238c7c4e 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -81,10 +81,10 @@ instance IsString ConfigValue where type RemoteName = String {- A git ref. Can be a sha1, or a branch or tag name. -} -newtype Ref = Ref String +newtype Ref = Ref S.ByteString deriving (Eq, Ord, Read, Show) -fromRef :: Ref -> String +fromRef :: Ref -> S.ByteString fromRef (Ref s) = s {- Aliases for Ref. -} diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index 68dc8b7097..59d08437de 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -75,14 +75,14 @@ lsTree (Ref x) repo streamer = do mapM_ streamer s void $ cleanup where - params = map Param ["ls-tree", "-z", "-r", "--full-tree", x] + params = map Param ["ls-tree", "-z", "-r", "--full-tree", decodeBS' x] lsSubTree :: Ref -> FilePath -> Repo -> Streamer lsSubTree (Ref x) p repo streamer = do (s, cleanup) <- pipeNullSplit params repo mapM_ streamer s void $ cleanup where - params = map Param ["ls-tree", "-z", "-r", "--full-tree", x, p] + params = map Param ["ls-tree", "-z", "-r", "--full-tree", decodeBS' x, p] {- Generates a line suitable to be fed into update-index, to add - a given file with a given sha. -} @@ -90,7 +90,7 @@ updateIndexLine :: Sha -> TreeItemType -> TopFilePath -> L.ByteString updateIndexLine sha treeitemtype file = L.fromStrict $ fmtTreeItemType treeitemtype <> " blob " - <> encodeBS (fromRef sha) + <> fromRef sha <> "\t" <> indexPath file @@ -108,7 +108,7 @@ unstageFile file repo = do unstageFile' :: TopFilePath -> Streamer unstageFile' p = pureStreamer $ L.fromStrict $ "0 " - <> encodeBS' (fromRef deleteSha) + <> fromRef deleteSha <> "\t" <> indexPath p diff --git a/Types/RefSpec.hs b/Types/RefSpec.hs index 8479a69a6b..1028ed5233 100644 --- a/Types/RefSpec.hs +++ b/Types/RefSpec.hs @@ -32,7 +32,7 @@ parseRefSpec v = case partitionEithers (map mk $ splitc ':' v) of mk ('+':s) | any (`elem` s) "*?" = Right $ AddMatching $ compileGlob s CaseSensative - | otherwise = Right $ AddRef $ Ref s + | otherwise = Right $ AddRef $ Ref $ encodeBS s mk ('-':s) = Right $ RemoveMatching $ compileGlob s CaseSensative mk "reflog" = Right AddRefLog mk s = Left $ "bad refspec item \"" ++ s ++ "\" (expected + or - prefix)" @@ -43,10 +43,10 @@ applyRefSpec refspec rs getreflog = go [] refspec go c [] = return (reverse c) go c (AddRef r : rest) = go (r:c) rest go c (AddMatching g : rest) = - let add = filter (matchGlob g . fromRef) rs + let add = filter (matchGlob g . decodeBS' . fromRef) rs in go (add ++ c) rest go c (AddRefLog : rest) = do reflog <- getreflog go (reflog ++ c) rest go c (RemoveMatching g : rest) = - go (filter (not . matchGlob g . fromRef) c) rest + go (filter (not . matchGlob g . decodeBS' . fromRef) c) rest diff --git a/Utility/Process.hs b/Utility/Process.hs index af3a5f4f62..e7142b9ecb 100644 --- a/Utility/Process.hs +++ b/Utility/Process.hs @@ -1,7 +1,7 @@ {- System.Process enhancements, including additional ways of running - processes, and logging. - - - Copyright 2012-2015 Joey Hess + - Copyright 2012-2020 Joey Hess - - License: BSD-2-clause -} @@ -53,6 +53,7 @@ import System.Log.Logger import Control.Concurrent import qualified Control.Exception as E import Control.Monad +import qualified Data.ByteString as S type CreateProcessRunner = forall a. CreateProcess -> ((Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> IO a) -> IO a @@ -85,25 +86,20 @@ writeReadProcessEnv -> [String] -> Maybe [(String, String)] -> (Maybe (Handle -> IO ())) - -> (Maybe (Handle -> IO ())) - -> IO String -writeReadProcessEnv cmd args environ writestdin adjusthandle = do + -> IO S.ByteString +writeReadProcessEnv cmd args environ writestdin = do (Just inh, Just outh, _, pid) <- createProcess p - maybe (return ()) (\a -> a inh) adjusthandle - maybe (return ()) (\a -> a outh) adjusthandle - -- fork off a thread to start consuming the output - output <- hGetContents outh outMVar <- newEmptyMVar - _ <- forkIO $ E.evaluate (length output) >> putMVar outMVar () + _ <- forkIO $ putMVar outMVar =<< S.hGetContents outh -- now write and flush any input maybe (return ()) (\a -> a inh >> hFlush inh) writestdin hClose inh -- done with stdin -- wait on the output - takeMVar outMVar + output <- takeMVar outMVar hClose outh -- wait on the process