68257e9076
filter-process: New command that can make git add/checkout faster when there are a lot of unlocked annexed files or non-annexed files, but that also makes git add of large annexed files slower. Use it by running: git config filter.annex.process 'git-annex filter-process' Fully tested and working, but I have not benchmarked it at all. And, incremental hashing is not done when git add uses it, so extra work is done in that case. Sponsored-by: Mark Reidenbach on Patreon
89 lines
2.8 KiB
Haskell
89 lines
2.8 KiB
Haskell
{- git-annex command
|
|
-
|
|
- Copyright 2021 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
module Command.FilterProcess where
|
|
|
|
import Command
|
|
import qualified Command.Smudge
|
|
import Git.FilterProcess
|
|
import Git.PktLine
|
|
import Annex.Link
|
|
|
|
import qualified Data.ByteString as B
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
cmd :: Command
|
|
cmd = noCommit $ noMessages $
|
|
command "filter-process" SectionPlumbing
|
|
"long running git filter process"
|
|
paramNothing (withParams seek)
|
|
|
|
seek :: CmdParams -> CommandSeek
|
|
seek _ = liftIO longRunningFilterProcessHandshake >>= \case
|
|
Left err -> giveup err
|
|
Right () -> go
|
|
where
|
|
go = liftIO getFilterRequest >>= \case
|
|
Just (Smudge f) -> do
|
|
smudge f
|
|
go
|
|
Just (Clean f) -> do
|
|
clean f
|
|
go
|
|
Nothing -> return ()
|
|
|
|
smudge :: FilePath -> Annex ()
|
|
smudge file = do
|
|
{- The whole git file content is necessarily buffered in memory,
|
|
- because we have to consume everything git is sending before
|
|
- we can respond to it. An annexed file will be only a pointer
|
|
- though. -}
|
|
b <- B.concat . map pktLineToByteString <$> liftIO readUntilFlushPkt
|
|
Command.Smudge.smudge' file (L.fromStrict b)
|
|
{- Git expects us to output the content of unlocked annexed files,
|
|
- but if we got a pointer, we output only the pointer.
|
|
- See Command.Smudge.smudge for details of how this works. -}
|
|
liftIO $ respondFilterRequest b
|
|
|
|
clean :: FilePath -> Annex ()
|
|
clean file = do
|
|
{- We have to consume everything git is sending before we can
|
|
- respond to it. But it can be an arbitrarily large file,
|
|
- which is being added to the annex, and we do not want to buffer
|
|
- all that in memory.
|
|
-
|
|
- Start by reading enough to determine if the file is an annex
|
|
- pointer.
|
|
-}
|
|
let conv b l = (B.concat (map pktLineToByteString l), b)
|
|
(b, readcomplete) <-
|
|
either (conv False) (conv True)
|
|
<$> liftIO (readUntilFlushPktOrSize unpaddedMaxPointerSz)
|
|
|
|
let passthrough
|
|
| readcomplete = liftIO $ respondFilterRequest b
|
|
| otherwise = liftIO $ do
|
|
-- Have to buffer the file content in memory here,
|
|
-- but it's not an annexed file, so not typically
|
|
-- large, and it's all stored in git, which also
|
|
-- buffers files in memory.
|
|
b' <- B.concat . (map pktLineToByteString)
|
|
<$> readUntilFlushPkt
|
|
respondFilterRequest (b <> b')
|
|
let discardreststdin
|
|
| readcomplete = return ()
|
|
| otherwise = liftIO discardUntilFlushPkt
|
|
let emitpointer = liftIO . respondFilterRequest . formatPointer
|
|
-- This does not incrementally hash, so both git and git-annex
|
|
-- read from the file. It may be less expensive to incrementally
|
|
-- hash the content provided by git, but Backend does not currently
|
|
-- have an interface to do so.
|
|
Command.Smudge.clean' (toRawFilePath file)
|
|
(parseLinkTargetOrPointer b)
|
|
passthrough
|
|
discardreststdin
|
|
emitpointer
|