git-annex/Git/FilterProcess.hs
Joey Hess 68257e9076
add git-annex filter-process
filter-process: New command that can make git add/checkout faster when
there are a lot of unlocked annexed files or non-annexed files, but that
also makes git add of large annexed files slower.

Use it by running: git
config filter.annex.process 'git-annex filter-process'

Fully tested and working, but I have not benchmarked it at all.
And, incremental hashing is not done when git add uses it, so extra work is
done in that case.

Sponsored-by: Mark Reidenbach on Patreon
2021-11-04 15:02:36 -04:00

205 lines
6.5 KiB
Haskell

{- git long-running filter process
-
- As documented in git's gitattributes(5) and
- Documentation/technical/long-running-process-protocol.txt
-
- Copyright 2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Git.FilterProcess (
WelcomeMessage(..),
Version(..),
Capability(..),
readUntilFlushPkt,
readUntilFlushPktOrSize,
discardUntilFlushPkt,
longRunningProcessHandshake,
longRunningFilterProcessHandshake,
FilterRequest(..),
getFilterRequest,
respondFilterRequest,
) where
import Common
import Git.PktLine
import qualified Data.ByteString as B
{- This is a message like "git-filter-client" or "git-filter-server" -}
data WelcomeMessage = WelcomeMessage PktLine
deriving (Show)
{- Configuration message, eg "foo=bar" -}
data ConfigValue = ConfigValue String String
deriving (Show, Eq)
encodeConfigValue :: ConfigValue -> PktLine
encodeConfigValue (ConfigValue k v) = stringPktLine (k <> "=" <> v)
decodeConfigValue :: PktLine -> Maybe ConfigValue
decodeConfigValue pktline =
let t = pktLineToString pktline
(k, v) = break (== '=') t
in if null v
then Nothing
else Just $ ConfigValue k (drop 1 v)
extractConfigValue :: [ConfigValue] -> String -> Maybe String
extractConfigValue [] _ = Nothing
extractConfigValue (ConfigValue k v:cs) wantk
| k == wantk = Just v
| otherwise = extractConfigValue cs wantk
data Version = Version Int
deriving (Show, Eq)
encodeVersion :: Version -> PktLine
encodeVersion (Version n) = encodeConfigValue $ ConfigValue "version" (show n)
decodeVersion :: PktLine -> Maybe Version
decodeVersion pktline = decodeConfigValue pktline >>= \case
ConfigValue "version" v -> Version <$> readish v
_ -> Nothing
data Capability = Capability String
deriving (Show, Eq)
encodeCapability :: Capability -> PktLine
encodeCapability (Capability c) = encodeConfigValue $
ConfigValue "capability" c
decodeCapability :: PktLine -> Maybe Capability
decodeCapability pktline = decodeConfigValue pktline >>= \case
ConfigValue "capability" c -> Just $ Capability c
_ -> Nothing
{- Reads PktLines until a flushPkt (or EOF),
- and returns all except the flushPkt -}
readUntilFlushPkt :: IO [PktLine]
readUntilFlushPkt = go []
where
go l = readPktLine stdin >>= \case
Just pktline | not (isFlushPkt pktline) -> go (pktline:l)
_ -> return (reverse l)
{- Reads PktLines until at least the specified number of bytes have been
- read, or until a flushPkt (or EOF). Returns Right if it did read a
- flushPkt/EOF, and Left if there is still content leftover that needs to
- be read. -}
readUntilFlushPktOrSize :: Int -> IO (Either [PktLine] [PktLine])
readUntilFlushPktOrSize = go []
where
go l n = readPktLine stdin >>= \case
Just pktline
| isFlushPkt pktline -> return (Right (reverse l))
| otherwise ->
let len = B.length (pktLineToByteString pktline)
n' = n - len
in if n' <= 0
then return (Left (reverse (pktline:l)))
else go (pktline:l) n'
Nothing -> return (Right (reverse l))
{- Reads PktLines until a flushPkt (or EOF), and throws them away. -}
discardUntilFlushPkt :: IO ()
discardUntilFlushPkt = readPktLine stdin >>= \case
Just pktline | isFlushPkt pktline -> return ()
Nothing -> return ()
_ -> discardUntilFlushPkt
longRunningProcessHandshake
:: (WelcomeMessage -> Maybe WelcomeMessage)
-> ([Version] -> [Version])
-> ([Capability] -> [Capability])
-> IO (Either String ())
longRunningProcessHandshake respwelcomemessage filterversions filtercapabilities =
readUntilFlushPkt >>= \case
[] -> protoerr "no welcome message"
(welcomemessage:versions) ->
checkwelcomemessage welcomemessage $
checkversion versions $ do
capabilities <- readUntilFlushPkt
checkcapabilities capabilities success
where
protoerr msg = return $ Left $ "git protocol error: " ++ msg
success = return (Right ())
checkwelcomemessage welcomemessage cont =
case respwelcomemessage (WelcomeMessage welcomemessage) of
Nothing -> protoerr "unsupported welcome message"
Just (WelcomeMessage welcomemessage') -> do
writePktLine stdout welcomemessage'
cont
checkversion versions cont = do
let versions' = filterversions (mapMaybe decodeVersion versions)
if null versions'
then protoerr "unsupported protocol version"
else do
forM_ versions' $ \v ->
writePktLine stdout $ encodeVersion v
writePktLine stdout flushPkt
cont
checkcapabilities capabilities cont = do
let capabilities' = filtercapabilities (mapMaybe decodeCapability capabilities)
if null capabilities'
then protoerr "unsupported protocol capabilities"
else do
forM_ capabilities' $ \c ->
writePktLine stdout $ encodeCapability c
writePktLine stdout flushPkt
cont
longRunningFilterProcessHandshake :: IO (Either String ())
longRunningFilterProcessHandshake =
longRunningProcessHandshake respwelcomemessage filterversions filtercapabilities
where
respwelcomemessage (WelcomeMessage w)
| pktLineToString w == "git-filter-client" =
Just $ WelcomeMessage $ stringPktLine "git-filter-server"
| otherwise = Nothing
filterversions = filter (== Version 2)
-- Delay capability is not implemented, so filter it out.
filtercapabilities = filter (`elem` [Capability "smudge", Capability "clean"])
data FilterRequest = Smudge FilePath | Clean FilePath
deriving (Show, Eq)
{- Waits for the next FilterRequest to be received. Does not read
- the content to be filtered, which is what gets sent subsequent to the
- FilterRequest. Use eg readUntilFlushPkt to read it, before calling
- respondFilterRequest. -}
getFilterRequest :: IO (Maybe FilterRequest)
getFilterRequest = do
ps <- readUntilFlushPkt
let cs = mapMaybe decodeConfigValue ps
case (extractConfigValue cs "command", extractConfigValue cs "pathname") of
(Just command, Just pathname)
| command == "smudge" -> return $ Just $ Smudge pathname
| command == "clean" -> return $ Just $ Clean pathname
| otherwise -> return Nothing
_ -> return Nothing
{- Send a response to a FilterRequest, consisting of the filtered content. -}
respondFilterRequest :: B.ByteString -> IO ()
respondFilterRequest b = do
writePktLine stdout $ encodeConfigValue $ ConfigValue "status" "success"
writePktLine stdout flushPkt
send b
-- The protocol allows for another list of ConfigValues to be sent
-- here, but we don't use it. Send another flushPkt to terminate
-- the empty list.
writePktLine stdout flushPkt
where
send b' =
let (pktline, rest) = encodePktLine b'
in do
writePktLine stdout pktline
case rest of
Just b'' -> send b''
Nothing -> writePktLine stdout flushPkt