add git-annex filter-process
filter-process: New command that can make git add/checkout faster when there are a lot of unlocked annexed files or non-annexed files, but that also makes git add of large annexed files slower. Use it by running: git config filter.annex.process 'git-annex filter-process' Fully tested and working, but I have not benchmarked it at all. And, incremental hashing is not done when git add uses it, so extra work is done in that case. Sponsored-by: Mark Reidenbach on Patreon
This commit is contained in:
parent
d706b49979
commit
68257e9076
10 changed files with 254 additions and 43 deletions
89
Command/FilterProcess.hs
Normal file
89
Command/FilterProcess.hs
Normal file
|
@ -0,0 +1,89 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Command.FilterProcess where
|
||||
|
||||
import Command
|
||||
import qualified Command.Smudge
|
||||
import Git.FilterProcess
|
||||
import Git.PktLine
|
||||
import Annex.Link
|
||||
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
cmd :: Command
|
||||
cmd = noCommit $ noMessages $
|
||||
command "filter-process" SectionPlumbing
|
||||
"long running git filter process"
|
||||
paramNothing (withParams seek)
|
||||
|
||||
seek :: CmdParams -> CommandSeek
|
||||
seek _ = liftIO longRunningFilterProcessHandshake >>= \case
|
||||
Left err -> giveup err
|
||||
Right () -> go
|
||||
where
|
||||
go = liftIO getFilterRequest >>= \case
|
||||
Just (Smudge f) -> do
|
||||
smudge f
|
||||
go
|
||||
Just (Clean f) -> do
|
||||
clean f
|
||||
go
|
||||
Nothing -> return ()
|
||||
|
||||
smudge :: FilePath -> Annex ()
|
||||
smudge file = do
|
||||
{- The whole git file content is necessarily buffered in memory,
|
||||
- because we have to consume everything git is sending before
|
||||
- we can respond to it. An annexed file will be only a pointer
|
||||
- though. -}
|
||||
b <- B.concat . map pktLineToByteString <$> liftIO readUntilFlushPkt
|
||||
Command.Smudge.smudge' file (L.fromStrict b)
|
||||
{- Git expects us to output the content of unlocked annexed files,
|
||||
- but if we got a pointer, we output only the pointer.
|
||||
- See Command.Smudge.smudge for details of how this works. -}
|
||||
liftIO $ respondFilterRequest b
|
||||
|
||||
clean :: FilePath -> Annex ()
|
||||
clean file = do
|
||||
{- We have to consume everything git is sending before we can
|
||||
- respond to it. But it can be an arbitrarily large file,
|
||||
- which is being added to the annex, and we do not want to buffer
|
||||
- all that in memory.
|
||||
-
|
||||
- Start by reading enough to determine if the file is an annex
|
||||
- pointer.
|
||||
-}
|
||||
let conv b l = (B.concat (map pktLineToByteString l), b)
|
||||
(b, readcomplete) <-
|
||||
either (conv False) (conv True)
|
||||
<$> liftIO (readUntilFlushPktOrSize unpaddedMaxPointerSz)
|
||||
|
||||
let passthrough
|
||||
| readcomplete = liftIO $ respondFilterRequest b
|
||||
| otherwise = liftIO $ do
|
||||
-- Have to buffer the file content in memory here,
|
||||
-- but it's not an annexed file, so not typically
|
||||
-- large, and it's all stored in git, which also
|
||||
-- buffers files in memory.
|
||||
b' <- B.concat . (map pktLineToByteString)
|
||||
<$> readUntilFlushPkt
|
||||
respondFilterRequest (b <> b')
|
||||
let discardreststdin
|
||||
| readcomplete = return ()
|
||||
| otherwise = liftIO discardUntilFlushPkt
|
||||
let emitpointer = liftIO . respondFilterRequest . formatPointer
|
||||
-- This does not incrementally hash, so both git and git-annex
|
||||
-- read from the file. It may be less expensive to incrementally
|
||||
-- hash the content provided by git, but Backend does not currently
|
||||
-- have an interface to do so.
|
||||
Command.Smudge.clean' (toRawFilePath file)
|
||||
(parseLinkTargetOrPointer b)
|
||||
passthrough
|
||||
discardreststdin
|
||||
emitpointer
|
Loading…
Add table
Add a link
Reference in a new issue