started converting Ref from String to ByteString

This should make code that reads shas and refs from git faster.

Does not compile yet, a lot needs to be done still.
This commit is contained in:
Joey Hess 2020-04-06 17:14:49 -04:00
parent 6e9714612b
commit 279991604d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
17 changed files with 80 additions and 51 deletions

View file

@ -28,6 +28,7 @@ import Foreign.C.Types
import Key
import Utility.InodeCache
import Utility.FileSize
import Utility.FileSystemEncoding
import Git.Types
import Types.UUID
import Types.Import
@ -94,10 +95,10 @@ newtype SSha = SSha String
deriving (Eq, Show)
toSSha :: Sha -> SSha
toSSha (Ref s) = SSha s
toSSha (Ref s) = SSha (decodeBS' s)
fromSSha :: SSha -> Ref
fromSSha (SSha s) = Ref s
fromSSha (SSha s) = Ref (encodeBS' s)
instance PersistField SSha where
toPersistValue (SSha b) = toPersistValue b

View file

@ -81,11 +81,16 @@ pipeReadStrict' reader params repo = assertLocal repo $
{- Runs a git command, feeding it an input, and returning its output,
- which is expected to be fairly small, since it's all read into memory
- strictly. -}
pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO String
pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO S.ByteString
pipeWriteRead params writer repo = assertLocal repo $
writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo)
(gitEnv repo) writer (Just adjusthandle)
(gitEnv repo) writer'
where
writer' = case writer of
Nothing -> Nothing
Just a -> Just $ \h -> do
adjusthandle h
a h
adjusthandle h = hSetNewlineMode h noNewlineTranslation
{- Runs a git command, feeding it input on a handle with an action. -}

View file

@ -58,7 +58,7 @@ urlCredential = Credential . M.singleton "url"
runCredential :: String -> Credential -> Repo -> IO Credential
runCredential action input r =
parseCredential <$> pipeWriteRead
parseCredential . decodeBS <$> pipeWriteRead
[ Param "credential"
, Param action
]

View file

@ -50,7 +50,7 @@ data BranchFilePath = BranchFilePath Ref TopFilePath
{- Git uses the branch:file form to refer to a BranchFilePath -}
descBranchFilePath :: BranchFilePath -> S.ByteString
descBranchFilePath (BranchFilePath b f) =
encodeBS' (fromRef b) <> ":" <> getTopFilePath f
fromRef b <> ":" <> getTopFilePath f
{- Path to a TopFilePath, within the provided git repo. -}
fromTopFilePath :: TopFilePath -> Git.Repo -> RawFilePath

View file

@ -139,7 +139,8 @@ isMissing s r = either (const True) (const False) <$> tryIO dump
] r
findShas :: [String] -> [Sha]
findShas = catMaybes . map extractSha . concat . map words . filter wanted
findShas = catMaybes . map (extractSha . encodeBS')
. concat . map words . filter wanted
where
wanted l = not ("dangling " `isPrefixOf` l)

View file

@ -18,6 +18,7 @@ import qualified Utility.CoProcess as CoProcess
import Utility.Tmp
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Builder
@ -39,7 +40,7 @@ hashFile :: HashObjectHandle -> FilePath -> IO Sha
hashFile h file = CoProcess.query h send receive
where
send to = hPutStrLn to =<< absPath file
receive from = getSha "hash-object" $ hGetLine from
receive from = getSha "hash-object" $ S8.hGetLine from
class HashableBlob t where
hashableBlobToHandle :: Handle -> t -> IO ()

View file

@ -15,6 +15,9 @@ import Git.Command
import Git.Sha
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
data History t = History t (S.Set (History t))
deriving (Show, Eq, Ord)
@ -53,8 +56,9 @@ getHistoryToDepth n commit r = do
!h <- fmap (truncateHistoryToDepth n)
. build Nothing
. map parsehistorycommit
. lines
<$> hGetContents inh
. map B.copy
. B8.lines
<$> L.hGetContents inh
hClose inh
void $ waitForProcess pid
return h
@ -93,7 +97,7 @@ getHistoryToDepth n commit r = do
, Param "--format=%T %H %P"
]
parsehistorycommit l = case map extractSha (splitc ' ' l) of
parsehistorycommit l = case map extractSha (S8.split ' ' l) of
(Just t:Just c:ps) -> Just $
( HistoryCommit
{ historyCommit = c

View file

@ -284,7 +284,7 @@ parseUnmerged s
then Nothing
else do
treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype)
sha <- extractSha rawsha
sha <- extractSha (encodeBS' rawsha)
return $ InternalUnmerged (stage == 2) (toRawFilePath file)
(Just treeitemtype) (Just sha)
_ -> Nothing

View file

@ -106,6 +106,6 @@ formatLsTree :: TreeItem -> String
formatLsTree ti = unwords
[ showOct (mode ti) ""
, decodeBS (typeobj ti)
, fromRef (sha ti)
, decodeBS' (fromRef (sha ti))
, fromRawFilePath (getTopFilePath (file ti))
]

View file

@ -26,13 +26,13 @@ listPackFiles r = filter (".pack" `isSuffixOf`)
listLooseObjectShas :: Repo -> IO [Sha]
listLooseObjectShas r = catchDefaultIO [] $
mapMaybe (extractSha . concat . reverse . take 2 . reverse . splitDirectories)
mapMaybe (extractSha . encodeBS . concat . reverse . take 2 . reverse . splitDirectories)
<$> dirContentsRecursiveSkipping (== "pack") True (objectsDir r)
looseObjectFile :: Repo -> Sha -> FilePath
looseObjectFile r sha = objectsDir r </> prefix </> rest
where
(prefix, rest) = splitAt 2 (fromRef sha)
(prefix, rest) = splitAt 2 (decodeBS' (fromRef sha))
listAlternates :: Repo -> IO [FilePath]
listAlternates r = catchDefaultIO [] (lines <$> readFile alternatesfile)

View file

@ -145,8 +145,8 @@ delete :: Sha -> Ref -> Repo -> IO ()
delete oldvalue ref = run
[ Param "update-ref"
, Param "-d"
, Param $ fromRef ref
, Param $ fromRef oldvalue
, Param $ decodeBS' (fromRef ref)
, Param $ decodeBS' (fromRef oldvalue)
]
{- Gets the sha of the tree a ref uses.
@ -154,13 +154,17 @@ delete oldvalue ref = run
- The ref may be something like a branch name, and it could contain
- ":subdir" if a subtree is wanted. -}
tree :: Ref -> Repo -> IO (Maybe Sha)
tree (Ref ref) = extractSha . decodeBS <$$> pipeReadStrict
[ Param "rev-parse", Param "--verify", Param "--quiet", Param ref' ]
tree (Ref ref) = extractSha <$$> pipeReadStrict
[ Param "rev-parse"
, Param "--verify"
, Param "--quiet"
, Param (decodeBS' ref')
]
where
ref' = if ":" `isInfixOf` ref
ref' = if ":" `S.isInfixOf` ref
then ref
-- de-reference commit objects to the tree
else ref ++ ":"
else ref <> ":"
{- Checks if a String is a legal git ref name.
-

View file

@ -12,16 +12,19 @@ import Git
import Git.Command
import Git.Sha
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
{- Gets the reflog for a given branch. -}
get :: Branch -> Repo -> IO [Sha]
get b = getMulti [b]
{- Gets reflogs for multiple branches. -}
getMulti :: [Branch] -> Repo -> IO [Sha]
getMulti bs = get' (map (Param . fromRef) bs)
getMulti bs = get' (map (Param . decodeBS' . fromRef) bs)
get' :: [CommandParam] -> Repo -> IO [Sha]
get' ps = mapMaybe extractSha . lines . decodeBS <$$> pipeReadStrict ps'
get' ps = mapMaybe (extractSha . S.copy) . S8.lines <$$> pipeReadStrict ps'
where
ps' = catMaybes
[ Just $ Param "log"

View file

@ -5,31 +5,43 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Git.Sha where
import Common
import Git.Types
import qualified Data.ByteString as S
import Data.Char
{- Runs an action that causes a git subcommand to emit a Sha, and strips
- any trailing newline, returning the sha. -}
getSha :: String -> IO String -> IO Sha
getSha :: String -> IO S.ByteString -> IO Sha
getSha subcommand a = maybe bad return =<< extractSha <$> a
where
bad = error $ "failed to read sha from git " ++ subcommand
{- Extracts the Sha from a string. There can be a trailing newline after
- it, but nothing else. -}
extractSha :: String -> Maybe Sha
{- Extracts the Sha from a ByteString.
-
- There can be a trailing newline after it, but nothing else.
-}
extractSha :: S.ByteString -> Maybe Sha
extractSha s
| len `elem` shaSizes = val s
| len - 1 `elem` shaSizes && length s' == len - 1 = val s'
| len - 1 `elem` shaSizes && S.length s' == len - 1 = val s'
| otherwise = Nothing
where
len = length s
s' = firstLine s
len = S.length s
s' = firstLine' s
val v
| all (`elem` "1234567890ABCDEFabcdef") v = Just $ Ref v
| S.all validinsha v = Just $ Ref v
| otherwise = Nothing
validinsha w = or
[ w >= 48 && w <= 57 -- 0-9
, w >= 97 && w <= 102 -- a-f
, w >= 65 && w <= 70 -- A-F
]
{- Sizes of git shas. -}
shaSizes :: [Int]
@ -41,7 +53,9 @@ shaSizes =
{- Git plumbing often uses a all 0 sha to represent things like a
- deleted file. -}
nullShas :: [Sha]
nullShas = map (\n -> Ref (replicate n '0')) shaSizes
nullShas = map (\n -> Ref (S.replicate n zero)) shaSizes
where
zero = fromIntegral (ord '0')
{- Sha to provide to git plumbing when deleting a file.
-

View file

@ -81,10 +81,10 @@ instance IsString ConfigValue where
type RemoteName = String
{- A git ref. Can be a sha1, or a branch or tag name. -}
newtype Ref = Ref String
newtype Ref = Ref S.ByteString
deriving (Eq, Ord, Read, Show)
fromRef :: Ref -> String
fromRef :: Ref -> S.ByteString
fromRef (Ref s) = s
{- Aliases for Ref. -}

View file

@ -75,14 +75,14 @@ lsTree (Ref x) repo streamer = do
mapM_ streamer s
void $ cleanup
where
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
params = map Param ["ls-tree", "-z", "-r", "--full-tree", decodeBS' x]
lsSubTree :: Ref -> FilePath -> Repo -> Streamer
lsSubTree (Ref x) p repo streamer = do
(s, cleanup) <- pipeNullSplit params repo
mapM_ streamer s
void $ cleanup
where
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x, p]
params = map Param ["ls-tree", "-z", "-r", "--full-tree", decodeBS' x, p]
{- Generates a line suitable to be fed into update-index, to add
- a given file with a given sha. -}
@ -90,7 +90,7 @@ updateIndexLine :: Sha -> TreeItemType -> TopFilePath -> L.ByteString
updateIndexLine sha treeitemtype file = L.fromStrict $
fmtTreeItemType treeitemtype
<> " blob "
<> encodeBS (fromRef sha)
<> fromRef sha
<> "\t"
<> indexPath file
@ -108,7 +108,7 @@ unstageFile file repo = do
unstageFile' :: TopFilePath -> Streamer
unstageFile' p = pureStreamer $ L.fromStrict $
"0 "
<> encodeBS' (fromRef deleteSha)
<> fromRef deleteSha
<> "\t"
<> indexPath p

View file

@ -32,7 +32,7 @@ parseRefSpec v = case partitionEithers (map mk $ splitc ':' v) of
mk ('+':s)
| any (`elem` s) "*?" =
Right $ AddMatching $ compileGlob s CaseSensative
| otherwise = Right $ AddRef $ Ref s
| otherwise = Right $ AddRef $ Ref $ encodeBS s
mk ('-':s) = Right $ RemoveMatching $ compileGlob s CaseSensative
mk "reflog" = Right AddRefLog
mk s = Left $ "bad refspec item \"" ++ s ++ "\" (expected + or - prefix)"
@ -43,10 +43,10 @@ applyRefSpec refspec rs getreflog = go [] refspec
go c [] = return (reverse c)
go c (AddRef r : rest) = go (r:c) rest
go c (AddMatching g : rest) =
let add = filter (matchGlob g . fromRef) rs
let add = filter (matchGlob g . decodeBS' . fromRef) rs
in go (add ++ c) rest
go c (AddRefLog : rest) = do
reflog <- getreflog
go (reflog ++ c) rest
go c (RemoveMatching g : rest) =
go (filter (not . matchGlob g . fromRef) c) rest
go (filter (not . matchGlob g . decodeBS' . fromRef) c) rest

View file

@ -1,7 +1,7 @@
{- System.Process enhancements, including additional ways of running
- processes, and logging.
-
- Copyright 2012-2015 Joey Hess <id@joeyh.name>
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
@ -53,6 +53,7 @@ import System.Log.Logger
import Control.Concurrent
import qualified Control.Exception as E
import Control.Monad
import qualified Data.ByteString as S
type CreateProcessRunner = forall a. CreateProcess -> ((Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> IO a) -> IO a
@ -85,25 +86,20 @@ writeReadProcessEnv
-> [String]
-> Maybe [(String, String)]
-> (Maybe (Handle -> IO ()))
-> (Maybe (Handle -> IO ()))
-> IO String
writeReadProcessEnv cmd args environ writestdin adjusthandle = do
-> IO S.ByteString
writeReadProcessEnv cmd args environ writestdin = do
(Just inh, Just outh, _, pid) <- createProcess p
maybe (return ()) (\a -> a inh) adjusthandle
maybe (return ()) (\a -> a outh) adjusthandle
-- fork off a thread to start consuming the output
output <- hGetContents outh
outMVar <- newEmptyMVar
_ <- forkIO $ E.evaluate (length output) >> putMVar outMVar ()
_ <- forkIO $ putMVar outMVar =<< S.hGetContents outh
-- now write and flush any input
maybe (return ()) (\a -> a inh >> hFlush inh) writestdin
hClose inh -- done with stdin
-- wait on the output
takeMVar outMVar
output <- takeMVar outMVar
hClose outh
-- wait on the process