started converting Ref from String to ByteString

This should make code that reads shas and refs from git faster.

Does not compile yet, a lot needs to be done still.
This commit is contained in:
Joey Hess 2020-04-06 17:14:49 -04:00
parent 6e9714612b
commit 279991604d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
17 changed files with 80 additions and 51 deletions

View file

@ -28,6 +28,7 @@ import Foreign.C.Types
import Key import Key
import Utility.InodeCache import Utility.InodeCache
import Utility.FileSize import Utility.FileSize
import Utility.FileSystemEncoding
import Git.Types import Git.Types
import Types.UUID import Types.UUID
import Types.Import import Types.Import
@ -94,10 +95,10 @@ newtype SSha = SSha String
deriving (Eq, Show) deriving (Eq, Show)
toSSha :: Sha -> SSha toSSha :: Sha -> SSha
toSSha (Ref s) = SSha s toSSha (Ref s) = SSha (decodeBS' s)
fromSSha :: SSha -> Ref fromSSha :: SSha -> Ref
fromSSha (SSha s) = Ref s fromSSha (SSha s) = Ref (encodeBS' s)
instance PersistField SSha where instance PersistField SSha where
toPersistValue (SSha b) = toPersistValue b toPersistValue (SSha b) = toPersistValue b

View file

@ -81,11 +81,16 @@ pipeReadStrict' reader params repo = assertLocal repo $
{- Runs a git command, feeding it an input, and returning its output, {- Runs a git command, feeding it an input, and returning its output,
- which is expected to be fairly small, since it's all read into memory - which is expected to be fairly small, since it's all read into memory
- strictly. -} - strictly. -}
pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO String pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO S.ByteString
pipeWriteRead params writer repo = assertLocal repo $ pipeWriteRead params writer repo = assertLocal repo $
writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo) writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo)
(gitEnv repo) writer (Just adjusthandle) (gitEnv repo) writer'
where where
writer' = case writer of
Nothing -> Nothing
Just a -> Just $ \h -> do
adjusthandle h
a h
adjusthandle h = hSetNewlineMode h noNewlineTranslation adjusthandle h = hSetNewlineMode h noNewlineTranslation
{- Runs a git command, feeding it input on a handle with an action. -} {- Runs a git command, feeding it input on a handle with an action. -}

View file

@ -58,7 +58,7 @@ urlCredential = Credential . M.singleton "url"
runCredential :: String -> Credential -> Repo -> IO Credential runCredential :: String -> Credential -> Repo -> IO Credential
runCredential action input r = runCredential action input r =
parseCredential <$> pipeWriteRead parseCredential . decodeBS <$> pipeWriteRead
[ Param "credential" [ Param "credential"
, Param action , Param action
] ]

View file

@ -50,7 +50,7 @@ data BranchFilePath = BranchFilePath Ref TopFilePath
{- Git uses the branch:file form to refer to a BranchFilePath -} {- Git uses the branch:file form to refer to a BranchFilePath -}
descBranchFilePath :: BranchFilePath -> S.ByteString descBranchFilePath :: BranchFilePath -> S.ByteString
descBranchFilePath (BranchFilePath b f) = descBranchFilePath (BranchFilePath b f) =
encodeBS' (fromRef b) <> ":" <> getTopFilePath f fromRef b <> ":" <> getTopFilePath f
{- Path to a TopFilePath, within the provided git repo. -} {- Path to a TopFilePath, within the provided git repo. -}
fromTopFilePath :: TopFilePath -> Git.Repo -> RawFilePath fromTopFilePath :: TopFilePath -> Git.Repo -> RawFilePath

View file

@ -139,7 +139,8 @@ isMissing s r = either (const True) (const False) <$> tryIO dump
] r ] r
findShas :: [String] -> [Sha] findShas :: [String] -> [Sha]
findShas = catMaybes . map extractSha . concat . map words . filter wanted findShas = catMaybes . map (extractSha . encodeBS')
. concat . map words . filter wanted
where where
wanted l = not ("dangling " `isPrefixOf` l) wanted l = not ("dangling " `isPrefixOf` l)

View file

@ -18,6 +18,7 @@ import qualified Utility.CoProcess as CoProcess
import Utility.Tmp import Utility.Tmp
import qualified Data.ByteString as S import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import Data.ByteString.Builder import Data.ByteString.Builder
@ -39,7 +40,7 @@ hashFile :: HashObjectHandle -> FilePath -> IO Sha
hashFile h file = CoProcess.query h send receive hashFile h file = CoProcess.query h send receive
where where
send to = hPutStrLn to =<< absPath file send to = hPutStrLn to =<< absPath file
receive from = getSha "hash-object" $ hGetLine from receive from = getSha "hash-object" $ S8.hGetLine from
class HashableBlob t where class HashableBlob t where
hashableBlobToHandle :: Handle -> t -> IO () hashableBlobToHandle :: Handle -> t -> IO ()

View file

@ -15,6 +15,9 @@ import Git.Command
import Git.Sha import Git.Sha
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
data History t = History t (S.Set (History t)) data History t = History t (S.Set (History t))
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
@ -53,8 +56,9 @@ getHistoryToDepth n commit r = do
!h <- fmap (truncateHistoryToDepth n) !h <- fmap (truncateHistoryToDepth n)
. build Nothing . build Nothing
. map parsehistorycommit . map parsehistorycommit
. lines . map B.copy
<$> hGetContents inh . B8.lines
<$> L.hGetContents inh
hClose inh hClose inh
void $ waitForProcess pid void $ waitForProcess pid
return h return h
@ -93,7 +97,7 @@ getHistoryToDepth n commit r = do
, Param "--format=%T %H %P" , Param "--format=%T %H %P"
] ]
parsehistorycommit l = case map extractSha (splitc ' ' l) of parsehistorycommit l = case map extractSha (S8.split ' ' l) of
(Just t:Just c:ps) -> Just $ (Just t:Just c:ps) -> Just $
( HistoryCommit ( HistoryCommit
{ historyCommit = c { historyCommit = c

View file

@ -284,7 +284,7 @@ parseUnmerged s
then Nothing then Nothing
else do else do
treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype) treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype)
sha <- extractSha rawsha sha <- extractSha (encodeBS' rawsha)
return $ InternalUnmerged (stage == 2) (toRawFilePath file) return $ InternalUnmerged (stage == 2) (toRawFilePath file)
(Just treeitemtype) (Just sha) (Just treeitemtype) (Just sha)
_ -> Nothing _ -> Nothing

View file

@ -106,6 +106,6 @@ formatLsTree :: TreeItem -> String
formatLsTree ti = unwords formatLsTree ti = unwords
[ showOct (mode ti) "" [ showOct (mode ti) ""
, decodeBS (typeobj ti) , decodeBS (typeobj ti)
, fromRef (sha ti) , decodeBS' (fromRef (sha ti))
, fromRawFilePath (getTopFilePath (file ti)) , fromRawFilePath (getTopFilePath (file ti))
] ]

View file

@ -26,13 +26,13 @@ listPackFiles r = filter (".pack" `isSuffixOf`)
listLooseObjectShas :: Repo -> IO [Sha] listLooseObjectShas :: Repo -> IO [Sha]
listLooseObjectShas r = catchDefaultIO [] $ listLooseObjectShas r = catchDefaultIO [] $
mapMaybe (extractSha . concat . reverse . take 2 . reverse . splitDirectories) mapMaybe (extractSha . encodeBS . concat . reverse . take 2 . reverse . splitDirectories)
<$> dirContentsRecursiveSkipping (== "pack") True (objectsDir r) <$> dirContentsRecursiveSkipping (== "pack") True (objectsDir r)
looseObjectFile :: Repo -> Sha -> FilePath looseObjectFile :: Repo -> Sha -> FilePath
looseObjectFile r sha = objectsDir r </> prefix </> rest looseObjectFile r sha = objectsDir r </> prefix </> rest
where where
(prefix, rest) = splitAt 2 (fromRef sha) (prefix, rest) = splitAt 2 (decodeBS' (fromRef sha))
listAlternates :: Repo -> IO [FilePath] listAlternates :: Repo -> IO [FilePath]
listAlternates r = catchDefaultIO [] (lines <$> readFile alternatesfile) listAlternates r = catchDefaultIO [] (lines <$> readFile alternatesfile)

View file

@ -145,8 +145,8 @@ delete :: Sha -> Ref -> Repo -> IO ()
delete oldvalue ref = run delete oldvalue ref = run
[ Param "update-ref" [ Param "update-ref"
, Param "-d" , Param "-d"
, Param $ fromRef ref , Param $ decodeBS' (fromRef ref)
, Param $ fromRef oldvalue , Param $ decodeBS' (fromRef oldvalue)
] ]
{- Gets the sha of the tree a ref uses. {- Gets the sha of the tree a ref uses.
@ -154,13 +154,17 @@ delete oldvalue ref = run
- The ref may be something like a branch name, and it could contain - The ref may be something like a branch name, and it could contain
- ":subdir" if a subtree is wanted. -} - ":subdir" if a subtree is wanted. -}
tree :: Ref -> Repo -> IO (Maybe Sha) tree :: Ref -> Repo -> IO (Maybe Sha)
tree (Ref ref) = extractSha . decodeBS <$$> pipeReadStrict tree (Ref ref) = extractSha <$$> pipeReadStrict
[ Param "rev-parse", Param "--verify", Param "--quiet", Param ref' ] [ Param "rev-parse"
, Param "--verify"
, Param "--quiet"
, Param (decodeBS' ref')
]
where where
ref' = if ":" `isInfixOf` ref ref' = if ":" `S.isInfixOf` ref
then ref then ref
-- de-reference commit objects to the tree -- de-reference commit objects to the tree
else ref ++ ":" else ref <> ":"
{- Checks if a String is a legal git ref name. {- Checks if a String is a legal git ref name.
- -

View file

@ -12,16 +12,19 @@ import Git
import Git.Command import Git.Command
import Git.Sha import Git.Sha
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
{- Gets the reflog for a given branch. -} {- Gets the reflog for a given branch. -}
get :: Branch -> Repo -> IO [Sha] get :: Branch -> Repo -> IO [Sha]
get b = getMulti [b] get b = getMulti [b]
{- Gets reflogs for multiple branches. -} {- Gets reflogs for multiple branches. -}
getMulti :: [Branch] -> Repo -> IO [Sha] getMulti :: [Branch] -> Repo -> IO [Sha]
getMulti bs = get' (map (Param . fromRef) bs) getMulti bs = get' (map (Param . decodeBS' . fromRef) bs)
get' :: [CommandParam] -> Repo -> IO [Sha] get' :: [CommandParam] -> Repo -> IO [Sha]
get' ps = mapMaybe extractSha . lines . decodeBS <$$> pipeReadStrict ps' get' ps = mapMaybe (extractSha . S.copy) . S8.lines <$$> pipeReadStrict ps'
where where
ps' = catMaybes ps' = catMaybes
[ Just $ Param "log" [ Just $ Param "log"

View file

@ -5,31 +5,43 @@
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE OverloadedStrings #-}
module Git.Sha where module Git.Sha where
import Common import Common
import Git.Types import Git.Types
import qualified Data.ByteString as S
import Data.Char
{- Runs an action that causes a git subcommand to emit a Sha, and strips {- Runs an action that causes a git subcommand to emit a Sha, and strips
- any trailing newline, returning the sha. -} - any trailing newline, returning the sha. -}
getSha :: String -> IO String -> IO Sha getSha :: String -> IO S.ByteString -> IO Sha
getSha subcommand a = maybe bad return =<< extractSha <$> a getSha subcommand a = maybe bad return =<< extractSha <$> a
where where
bad = error $ "failed to read sha from git " ++ subcommand bad = error $ "failed to read sha from git " ++ subcommand
{- Extracts the Sha from a string. There can be a trailing newline after {- Extracts the Sha from a ByteString.
- it, but nothing else. -} -
extractSha :: String -> Maybe Sha - There can be a trailing newline after it, but nothing else.
-}
extractSha :: S.ByteString -> Maybe Sha
extractSha s extractSha s
| len `elem` shaSizes = val s | len `elem` shaSizes = val s
| len - 1 `elem` shaSizes && length s' == len - 1 = val s' | len - 1 `elem` shaSizes && S.length s' == len - 1 = val s'
| otherwise = Nothing | otherwise = Nothing
where where
len = length s len = S.length s
s' = firstLine s s' = firstLine' s
val v val v
| all (`elem` "1234567890ABCDEFabcdef") v = Just $ Ref v | S.all validinsha v = Just $ Ref v
| otherwise = Nothing | otherwise = Nothing
validinsha w = or
[ w >= 48 && w <= 57 -- 0-9
, w >= 97 && w <= 102 -- a-f
, w >= 65 && w <= 70 -- A-F
]
{- Sizes of git shas. -} {- Sizes of git shas. -}
shaSizes :: [Int] shaSizes :: [Int]
@ -41,7 +53,9 @@ shaSizes =
{- Git plumbing often uses a all 0 sha to represent things like a {- Git plumbing often uses a all 0 sha to represent things like a
- deleted file. -} - deleted file. -}
nullShas :: [Sha] nullShas :: [Sha]
nullShas = map (\n -> Ref (replicate n '0')) shaSizes nullShas = map (\n -> Ref (S.replicate n zero)) shaSizes
where
zero = fromIntegral (ord '0')
{- Sha to provide to git plumbing when deleting a file. {- Sha to provide to git plumbing when deleting a file.
- -

View file

@ -81,10 +81,10 @@ instance IsString ConfigValue where
type RemoteName = String type RemoteName = String
{- A git ref. Can be a sha1, or a branch or tag name. -} {- A git ref. Can be a sha1, or a branch or tag name. -}
newtype Ref = Ref String newtype Ref = Ref S.ByteString
deriving (Eq, Ord, Read, Show) deriving (Eq, Ord, Read, Show)
fromRef :: Ref -> String fromRef :: Ref -> S.ByteString
fromRef (Ref s) = s fromRef (Ref s) = s
{- Aliases for Ref. -} {- Aliases for Ref. -}

View file

@ -75,14 +75,14 @@ lsTree (Ref x) repo streamer = do
mapM_ streamer s mapM_ streamer s
void $ cleanup void $ cleanup
where where
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x] params = map Param ["ls-tree", "-z", "-r", "--full-tree", decodeBS' x]
lsSubTree :: Ref -> FilePath -> Repo -> Streamer lsSubTree :: Ref -> FilePath -> Repo -> Streamer
lsSubTree (Ref x) p repo streamer = do lsSubTree (Ref x) p repo streamer = do
(s, cleanup) <- pipeNullSplit params repo (s, cleanup) <- pipeNullSplit params repo
mapM_ streamer s mapM_ streamer s
void $ cleanup void $ cleanup
where where
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x, p] params = map Param ["ls-tree", "-z", "-r", "--full-tree", decodeBS' x, p]
{- Generates a line suitable to be fed into update-index, to add {- Generates a line suitable to be fed into update-index, to add
- a given file with a given sha. -} - a given file with a given sha. -}
@ -90,7 +90,7 @@ updateIndexLine :: Sha -> TreeItemType -> TopFilePath -> L.ByteString
updateIndexLine sha treeitemtype file = L.fromStrict $ updateIndexLine sha treeitemtype file = L.fromStrict $
fmtTreeItemType treeitemtype fmtTreeItemType treeitemtype
<> " blob " <> " blob "
<> encodeBS (fromRef sha) <> fromRef sha
<> "\t" <> "\t"
<> indexPath file <> indexPath file
@ -108,7 +108,7 @@ unstageFile file repo = do
unstageFile' :: TopFilePath -> Streamer unstageFile' :: TopFilePath -> Streamer
unstageFile' p = pureStreamer $ L.fromStrict $ unstageFile' p = pureStreamer $ L.fromStrict $
"0 " "0 "
<> encodeBS' (fromRef deleteSha) <> fromRef deleteSha
<> "\t" <> "\t"
<> indexPath p <> indexPath p

View file

@ -32,7 +32,7 @@ parseRefSpec v = case partitionEithers (map mk $ splitc ':' v) of
mk ('+':s) mk ('+':s)
| any (`elem` s) "*?" = | any (`elem` s) "*?" =
Right $ AddMatching $ compileGlob s CaseSensative Right $ AddMatching $ compileGlob s CaseSensative
| otherwise = Right $ AddRef $ Ref s | otherwise = Right $ AddRef $ Ref $ encodeBS s
mk ('-':s) = Right $ RemoveMatching $ compileGlob s CaseSensative mk ('-':s) = Right $ RemoveMatching $ compileGlob s CaseSensative
mk "reflog" = Right AddRefLog mk "reflog" = Right AddRefLog
mk s = Left $ "bad refspec item \"" ++ s ++ "\" (expected + or - prefix)" mk s = Left $ "bad refspec item \"" ++ s ++ "\" (expected + or - prefix)"
@ -43,10 +43,10 @@ applyRefSpec refspec rs getreflog = go [] refspec
go c [] = return (reverse c) go c [] = return (reverse c)
go c (AddRef r : rest) = go (r:c) rest go c (AddRef r : rest) = go (r:c) rest
go c (AddMatching g : rest) = go c (AddMatching g : rest) =
let add = filter (matchGlob g . fromRef) rs let add = filter (matchGlob g . decodeBS' . fromRef) rs
in go (add ++ c) rest in go (add ++ c) rest
go c (AddRefLog : rest) = do go c (AddRefLog : rest) = do
reflog <- getreflog reflog <- getreflog
go (reflog ++ c) rest go (reflog ++ c) rest
go c (RemoveMatching g : rest) = go c (RemoveMatching g : rest) =
go (filter (not . matchGlob g . fromRef) c) rest go (filter (not . matchGlob g . decodeBS' . fromRef) c) rest

View file

@ -1,7 +1,7 @@
{- System.Process enhancements, including additional ways of running {- System.Process enhancements, including additional ways of running
- processes, and logging. - processes, and logging.
- -
- Copyright 2012-2015 Joey Hess <id@joeyh.name> - Copyright 2012-2020 Joey Hess <id@joeyh.name>
- -
- License: BSD-2-clause - License: BSD-2-clause
-} -}
@ -53,6 +53,7 @@ import System.Log.Logger
import Control.Concurrent import Control.Concurrent
import qualified Control.Exception as E import qualified Control.Exception as E
import Control.Monad import Control.Monad
import qualified Data.ByteString as S
type CreateProcessRunner = forall a. CreateProcess -> ((Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> IO a) -> IO a type CreateProcessRunner = forall a. CreateProcess -> ((Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> IO a) -> IO a
@ -85,25 +86,20 @@ writeReadProcessEnv
-> [String] -> [String]
-> Maybe [(String, String)] -> Maybe [(String, String)]
-> (Maybe (Handle -> IO ())) -> (Maybe (Handle -> IO ()))
-> (Maybe (Handle -> IO ())) -> IO S.ByteString
-> IO String writeReadProcessEnv cmd args environ writestdin = do
writeReadProcessEnv cmd args environ writestdin adjusthandle = do
(Just inh, Just outh, _, pid) <- createProcess p (Just inh, Just outh, _, pid) <- createProcess p
maybe (return ()) (\a -> a inh) adjusthandle
maybe (return ()) (\a -> a outh) adjusthandle
-- fork off a thread to start consuming the output -- fork off a thread to start consuming the output
output <- hGetContents outh
outMVar <- newEmptyMVar outMVar <- newEmptyMVar
_ <- forkIO $ E.evaluate (length output) >> putMVar outMVar () _ <- forkIO $ putMVar outMVar =<< S.hGetContents outh
-- now write and flush any input -- now write and flush any input
maybe (return ()) (\a -> a inh >> hFlush inh) writestdin maybe (return ()) (\a -> a inh >> hFlush inh) writestdin
hClose inh -- done with stdin hClose inh -- done with stdin
-- wait on the output -- wait on the output
takeMVar outMVar output <- takeMVar outMVar
hClose outh hClose outh
-- wait on the process -- wait on the process