optimise catfile interface with ByteString and Attoparsec
Around 3% total speedup. Profiling git annex find --not --in web, it's now bytestring end-to-end, and there is only a little added overhead in eg accessing the Annex state MVar (3%). The rest of the runtime is spent reading symlinks, and in attoparsec. This feels like the end of the optimisation road, without a major change like caching information for faster queries.
This commit is contained in:
parent
2caf579718
commit
86426036a0
3 changed files with 31 additions and 23 deletions
|
@ -3,7 +3,7 @@ git-annex (8.20200331) UNRELEASED; urgency=medium
|
||||||
* Improve git-annex's ability to find the path to its program,
|
* Improve git-annex's ability to find the path to its program,
|
||||||
especially when it needs to run itself in another repo to upgrade it.
|
especially when it needs to run itself in another repo to upgrade it.
|
||||||
* adb: Better messages when the adb command is not installed.
|
* adb: Better messages when the adb command is not installed.
|
||||||
* Sped up query commands that read the git-annex branch by around 6%.
|
* Sped up query commands that read the git-annex branch by around 9%.
|
||||||
* Various speed improvements gained by using ByteStrings for git refs and
|
* Various speed improvements gained by using ByteStrings for git refs and
|
||||||
shas.
|
shas.
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git cat-file interface
|
{- git cat-file interface
|
||||||
-
|
-
|
||||||
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -24,6 +24,9 @@ module Git.CatFile (
|
||||||
import System.IO
|
import System.IO
|
||||||
import qualified Data.ByteString as S
|
import qualified Data.ByteString as S
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
import qualified Data.ByteString.Char8 as S8
|
||||||
|
import qualified Data.Attoparsec.ByteString as A
|
||||||
|
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.String
|
import Data.String
|
||||||
import Data.Char
|
import Data.Char
|
||||||
|
@ -83,9 +86,9 @@ catObject h object = maybe L.empty fst3 <$> catObjectDetails h object
|
||||||
|
|
||||||
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
|
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
|
||||||
catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do
|
catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do
|
||||||
header <- hGetLine from
|
header <- S8.hGetLine from
|
||||||
case parseResp object header of
|
case parseResp object header of
|
||||||
Just (ParsedResp sha size objtype) -> do
|
Just (ParsedResp sha objtype size) -> do
|
||||||
content <- S.hGet from (fromIntegral size)
|
content <- S.hGet from (fromIntegral size)
|
||||||
eatchar '\n' from
|
eatchar '\n' from
|
||||||
return $ Just (L.fromChunks [content], sha, objtype)
|
return $ Just (L.fromChunks [content], sha, objtype)
|
||||||
|
@ -113,9 +116,9 @@ catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \f
|
||||||
{- Gets the size and type of an object, without reading its content. -}
|
{- Gets the size and type of an object, without reading its content. -}
|
||||||
catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
|
catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
|
||||||
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
|
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
|
||||||
resp <- hGetLine from
|
resp <- S8.hGetLine from
|
||||||
case parseResp object resp of
|
case parseResp object resp of
|
||||||
Just (ParsedResp sha size objtype) ->
|
Just (ParsedResp sha objtype size) ->
|
||||||
return $ Just (sha, size, objtype)
|
return $ Just (sha, size, objtype)
|
||||||
Just DNE -> return Nothing
|
Just DNE -> return Nothing
|
||||||
Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object)
|
Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object)
|
||||||
|
@ -127,35 +130,39 @@ catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $
|
||||||
objtype <- queryObjectType object (gitRepo h)
|
objtype <- queryObjectType object (gitRepo h)
|
||||||
return $ (,,) <$> sha <*> sz <*> objtype
|
return $ (,,) <$> sha <*> sz <*> objtype
|
||||||
|
|
||||||
data ParsedResp = ParsedResp Sha FileSize ObjectType | DNE
|
data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a
|
query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a
|
||||||
query hdl object newlinefallback receive
|
query hdl object newlinefallback receive
|
||||||
-- git cat-file --batch uses a line based protocol, so when the
|
-- git cat-file --batch uses a line based protocol, so when the
|
||||||
-- filename itself contains a newline, have to fall back to another
|
-- filename itself contains a newline, have to fall back to another
|
||||||
-- method of getting the information.
|
-- method of getting the information.
|
||||||
| '\n' `elem` s = newlinefallback
|
| '\n' `S8.elem` s = newlinefallback
|
||||||
-- git strips carriage return from the end of a line, out of some
|
-- git strips carriage return from the end of a line, out of some
|
||||||
-- misplaced desire to support windows, so also use the newline
|
-- misplaced desire to support windows, so also use the newline
|
||||||
-- fallback for those.
|
-- fallback for those.
|
||||||
| "\r" `isSuffixOf` s = newlinefallback
|
| "\r" `S8.isSuffixOf` s = newlinefallback
|
||||||
| otherwise = CoProcess.query hdl send receive
|
| otherwise = CoProcess.query hdl send receive
|
||||||
where
|
where
|
||||||
send to = hPutStrLn to s
|
send to = S8.hPutStrLn to s
|
||||||
s = fromRef object
|
s = fromRef' object
|
||||||
|
|
||||||
parseResp :: Ref -> String -> Maybe ParsedResp
|
parseResp :: Ref -> S.ByteString -> Maybe ParsedResp
|
||||||
parseResp object l
|
parseResp object s
|
||||||
| " missing" `isSuffixOf` l -- less expensive than full check
|
| " missing" `S.isSuffixOf` s -- less expensive than full check
|
||||||
&& l == fromRef object ++ " missing" = Just DNE
|
&& s == fromRef' object <> " missing" = Just DNE
|
||||||
| otherwise = case words l of
|
| otherwise = eitherToMaybe $ A.parseOnly respParser s
|
||||||
[sha, objtype, size] -> case extractSha (encodeBS sha) of
|
|
||||||
Just sha' -> case (readObjectType (encodeBS objtype), reads size) of
|
respParser :: A.Parser ParsedResp
|
||||||
(Just t, [(bytes, "")]) ->
|
respParser = ParsedResp
|
||||||
Just $ ParsedResp sha' bytes t
|
<$> (maybe (fail "bad sha") return . extractSha =<< nextword)
|
||||||
_ -> Nothing
|
<* A8.char ' '
|
||||||
Nothing -> Nothing
|
<*> (maybe (fail "bad object type") return . readObjectType =<< nextword)
|
||||||
_ -> Nothing
|
<* A8.char ' '
|
||||||
|
<*> A8.decimal
|
||||||
|
where
|
||||||
|
nextword = A8.takeTill (== ' ')
|
||||||
|
|
||||||
querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a)
|
querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a)
|
||||||
querySingle o r repo reader = assertLocal repo $
|
querySingle o r repo reader = assertLocal repo $
|
||||||
|
|
|
@ -101,6 +101,7 @@ newtype RefDate = RefDate String
|
||||||
|
|
||||||
{- Types of objects that can be stored in git. -}
|
{- Types of objects that can be stored in git. -}
|
||||||
data ObjectType = BlobObject | CommitObject | TreeObject
|
data ObjectType = BlobObject | CommitObject | TreeObject
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
readObjectType :: S.ByteString -> Maybe ObjectType
|
readObjectType :: S.ByteString -> Maybe ObjectType
|
||||||
readObjectType "blob" = Just BlobObject
|
readObjectType "blob" = Just BlobObject
|
||||||
|
|
Loading…
Reference in a new issue