Avoid git status taking a long time after git-annex unlock of many files.
Implemented by making Git.Queue have a FlushAction, which can accumulate along with another action on files, and runs only once the other action has run. This lets git-annex unlock queue up git update-index actions, without conflicting with the restagePointerFiles FlushActions. In a repository with filter-process enabled, git-annex unlock will often not take any more time than before, though it may when the files are large. Either way, it should always slow down less than git-annex status speeds up. When filter-process is not enabled, git-annex unlock will slow down as much as git status speeds up. Sponsored-by: Jochen Bartl on Patreon
This commit is contained in:
parent
c68f52c6a2
commit
faf84aa5c2
6 changed files with 76 additions and 63 deletions
|
@ -190,7 +190,7 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd ->
|
||||||
-- fails on "../../repo/path/file" when cwd is not in the repo
|
-- fails on "../../repo/path/file" when cwd is not in the repo
|
||||||
-- being acted on. Avoid these problems with an absolute path.
|
-- being acted on. Avoid these problems with an absolute path.
|
||||||
absf <- liftIO $ absPath f
|
absf <- liftIO $ absPath f
|
||||||
Annex.Queue.addInternalAction runner [(absf, isunmodified tsd, inodeCacheFileSize orig)]
|
Annex.Queue.addFlushAction runner [(absf, isunmodified tsd, inodeCacheFileSize orig)]
|
||||||
where
|
where
|
||||||
isunmodified tsd = genInodeCache f tsd >>= return . \case
|
isunmodified tsd = genInodeCache f tsd >>= return . \case
|
||||||
Nothing -> False
|
Nothing -> False
|
||||||
|
@ -202,8 +202,8 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd ->
|
||||||
-- on all still-unmodified files, using a copy of the index file,
|
-- on all still-unmodified files, using a copy of the index file,
|
||||||
-- to bypass the lock. Then replace the old index file with the new
|
-- to bypass the lock. Then replace the old index file with the new
|
||||||
-- updated index file.
|
-- updated index file.
|
||||||
runner :: Git.Queue.InternalActionRunner Annex
|
runner :: Git.Queue.FlushActionRunner Annex
|
||||||
runner = Git.Queue.InternalActionRunner "restagePointerFile" $ \r l -> do
|
runner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r l -> do
|
||||||
-- Flush any queued changes to the keys database, so they
|
-- Flush any queued changes to the keys database, so they
|
||||||
-- are visible to child processes.
|
-- are visible to child processes.
|
||||||
-- The database is closed because that may improve behavior
|
-- The database is closed because that may improve behavior
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
module Annex.Queue (
|
module Annex.Queue (
|
||||||
addCommand,
|
addCommand,
|
||||||
addInternalAction,
|
addFlushAction,
|
||||||
addUpdateIndex,
|
addUpdateIndex,
|
||||||
flush,
|
flush,
|
||||||
flushWhenFull,
|
flushWhenFull,
|
||||||
|
@ -31,11 +31,11 @@ addCommand commonparams command params files = do
|
||||||
store =<< flushWhenFull =<<
|
store =<< flushWhenFull =<<
|
||||||
(Git.Queue.addCommand commonparams command params files q =<< gitRepo)
|
(Git.Queue.addCommand commonparams command params files q =<< gitRepo)
|
||||||
|
|
||||||
addInternalAction :: Git.Queue.InternalActionRunner Annex -> [(RawFilePath, IO Bool, FileSize)] -> Annex ()
|
addFlushAction :: Git.Queue.FlushActionRunner Annex -> [(RawFilePath, IO Bool, FileSize)] -> Annex ()
|
||||||
addInternalAction runner files = do
|
addFlushAction runner files = do
|
||||||
q <- get
|
q <- get
|
||||||
store =<< flushWhenFull =<<
|
store =<< flushWhenFull =<<
|
||||||
(Git.Queue.addInternalAction runner files q =<< gitRepo)
|
(Git.Queue.addFlushAction runner files q =<< gitRepo)
|
||||||
|
|
||||||
{- Adds an update-index stream to the queue. -}
|
{- Adds an update-index stream to the queue. -}
|
||||||
addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex ()
|
addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex ()
|
||||||
|
|
|
@ -10,6 +10,7 @@ git-annex (10.20220128) UNRELEASED; urgency=medium
|
||||||
* registerurl, unregisterurl: Improved output when reading from stdin
|
* registerurl, unregisterurl: Improved output when reading from stdin
|
||||||
to be more like other batch commands.
|
to be more like other batch commands.
|
||||||
* registerurl, unregisterurl: Added --json and --json-error-messages options.
|
* registerurl, unregisterurl: Added --json and --json-error-messages options.
|
||||||
|
* Avoid git status taking a long time after git-annex unlock of many files.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 31 Jan 2022 13:14:42 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 31 Jan 2022 13:14:42 -0400
|
||||||
|
|
||||||
|
|
87
Git/Queue.hs
87
Git/Queue.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git repository command queue
|
{- git repository command queue
|
||||||
-
|
-
|
||||||
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -13,8 +13,8 @@ module Git.Queue (
|
||||||
defaultTimelimit,
|
defaultTimelimit,
|
||||||
addCommand,
|
addCommand,
|
||||||
addUpdateIndex,
|
addUpdateIndex,
|
||||||
addInternalAction,
|
addFlushAction,
|
||||||
InternalActionRunner(..),
|
FlushActionRunner(..),
|
||||||
size,
|
size,
|
||||||
full,
|
full,
|
||||||
flush,
|
flush,
|
||||||
|
@ -48,30 +48,34 @@ data Action m
|
||||||
-- ^ parameters that come after the git subcommand
|
-- ^ parameters that come after the git subcommand
|
||||||
, getFiles :: [CommandParam]
|
, getFiles :: [CommandParam]
|
||||||
}
|
}
|
||||||
{- An internal action to run, on a list of files that can be added
|
{- A FlushAction can be added along with CommandActions or
|
||||||
- to as the queue grows. -}
|
- UpdateIndexActions, and when the queue later gets flushed,
|
||||||
| InternalAction
|
- those will be run before the FlushAction is. -}
|
||||||
{ getRunner :: InternalActionRunner m
|
| FlushAction
|
||||||
, getInternalFiles :: [(RawFilePath, IO Bool, FileSize)]
|
{ getFlushActionRunner :: FlushActionRunner m
|
||||||
|
, getFlushActionFiles :: [(RawFilePath, IO Bool, FileSize)]
|
||||||
}
|
}
|
||||||
|
|
||||||
{- The String must be unique for each internal action. -}
|
{- The String must be unique for each flush action. -}
|
||||||
data InternalActionRunner m = InternalActionRunner String (Repo -> [(RawFilePath, IO Bool, FileSize)] -> m ())
|
data FlushActionRunner m = FlushActionRunner String (Repo -> [(RawFilePath, IO Bool, FileSize)] -> m ())
|
||||||
|
|
||||||
instance Eq (InternalActionRunner m) where
|
instance Eq (FlushActionRunner m) where
|
||||||
InternalActionRunner s1 _ == InternalActionRunner s2 _ = s1 == s2
|
FlushActionRunner s1 _ == FlushActionRunner s2 _ = s1 == s2
|
||||||
|
|
||||||
{- A key that can uniquely represent an action in a Map. -}
|
{- A key that can uniquely represent an action in a Map.
|
||||||
|
-
|
||||||
|
- The ordering controls what order the actions are run in when flushing
|
||||||
|
- the queue. -}
|
||||||
data ActionKey
|
data ActionKey
|
||||||
= UpdateIndexActionKey
|
= UpdateIndexActionKey
|
||||||
| CommandActionKey [CommandParam] String [CommandParam]
|
| CommandActionKey [CommandParam] String [CommandParam]
|
||||||
| InternalActionKey String
|
| FlushActionKey String
|
||||||
deriving (Eq, Ord)
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
actionKey :: Action m -> ActionKey
|
actionKey :: Action m -> ActionKey
|
||||||
actionKey (UpdateIndexAction _) = UpdateIndexActionKey
|
actionKey (UpdateIndexAction _) = UpdateIndexActionKey
|
||||||
actionKey CommandAction { getCommonParams = c, getSubcommand = s, getParams = p } = CommandActionKey c s p
|
actionKey CommandAction { getCommonParams = c, getSubcommand = s, getParams = p } = CommandActionKey c s p
|
||||||
actionKey InternalAction { getRunner = InternalActionRunner s _ } = InternalActionKey s
|
actionKey FlushAction { getFlushActionRunner = FlushActionRunner s _ } = FlushActionKey s
|
||||||
|
|
||||||
{- A queue of actions to perform (in any order) on a git repository,
|
{- A queue of actions to perform (in any order) on a git repository,
|
||||||
- with lists of files to perform them on. This allows coalescing
|
- with lists of files to perform them on. This allows coalescing
|
||||||
|
@ -120,7 +124,7 @@ new lim tlim = do
|
||||||
-}
|
-}
|
||||||
addCommand :: MonadIO m => [CommandParam] -> String -> [CommandParam] -> [FilePath] -> Queue m -> Repo -> m (Queue m)
|
addCommand :: MonadIO m => [CommandParam] -> String -> [CommandParam] -> [FilePath] -> Queue m -> Repo -> m (Queue m)
|
||||||
addCommand commonparams subcommand params files q repo =
|
addCommand commonparams subcommand params files q repo =
|
||||||
updateQueue action different (length files) q repo
|
updateQueue action conflicting (length files) q repo
|
||||||
where
|
where
|
||||||
action = CommandAction
|
action = CommandAction
|
||||||
{ getCommonParams = commonparams
|
{ getCommonParams = commonparams
|
||||||
|
@ -129,36 +133,37 @@ addCommand commonparams subcommand params files q repo =
|
||||||
, getFiles = map File files
|
, getFiles = map File files
|
||||||
}
|
}
|
||||||
|
|
||||||
different (CommandAction { getSubcommand = s }) = s /= subcommand
|
conflicting (CommandAction { getSubcommand = s }) = s /= subcommand
|
||||||
different _ = True
|
conflicting (FlushAction {}) = False
|
||||||
|
conflicting _ = True
|
||||||
|
|
||||||
{- Adds an internal action to the queue. -}
|
{- Adds an flush action to the queue. This can co-exist with anything else
|
||||||
addInternalAction :: MonadIO m => InternalActionRunner m -> [(RawFilePath, IO Bool, FileSize)] -> Queue m -> Repo -> m (Queue m)
|
- that gets added to the queue, and when the queue is eventually flushed,
|
||||||
addInternalAction runner files q repo =
|
- it will be run after the other things in the queue. -}
|
||||||
updateQueue action different (length files) q repo
|
addFlushAction :: MonadIO m => FlushActionRunner m -> [(RawFilePath, IO Bool, FileSize)] -> Queue m -> Repo -> m (Queue m)
|
||||||
|
addFlushAction runner files q repo =
|
||||||
|
updateQueue action (const False) (length files) q repo
|
||||||
where
|
where
|
||||||
action = InternalAction
|
action = FlushAction
|
||||||
{ getRunner = runner
|
{ getFlushActionRunner = runner
|
||||||
, getInternalFiles = files
|
, getFlushActionFiles = files
|
||||||
}
|
}
|
||||||
|
|
||||||
different (InternalAction { getRunner = r }) = r /= runner
|
|
||||||
different _ = True
|
|
||||||
|
|
||||||
{- Adds an update-index streamer to the queue. -}
|
{- Adds an update-index streamer to the queue. -}
|
||||||
addUpdateIndex :: MonadIO m => Git.UpdateIndex.Streamer -> Queue m -> Repo -> m (Queue m)
|
addUpdateIndex :: MonadIO m => Git.UpdateIndex.Streamer -> Queue m -> Repo -> m (Queue m)
|
||||||
addUpdateIndex streamer q repo =
|
addUpdateIndex streamer q repo =
|
||||||
updateQueue action different 1 q repo
|
updateQueue action conflicting 1 q repo
|
||||||
where
|
where
|
||||||
-- the list is built in reverse order
|
-- the list is built in reverse order
|
||||||
action = UpdateIndexAction [streamer]
|
action = UpdateIndexAction [streamer]
|
||||||
|
|
||||||
different (UpdateIndexAction _) = False
|
conflicting (UpdateIndexAction _) = False
|
||||||
different _ = True
|
conflicting (FlushAction {}) = False
|
||||||
|
conflicting _ = True
|
||||||
|
|
||||||
{- Updates or adds an action in the queue.
|
{- Updates or adds an action in the queue.
|
||||||
-
|
-
|
||||||
- If the queue already contains a different action, it will be flushed
|
- If the queue already contains a conflicting action, it will be flushed
|
||||||
- before adding the action; this is to ensure that conflicting actions,
|
- before adding the action; this is to ensure that conflicting actions,
|
||||||
- like add and rm, are run in the right order.
|
- like add and rm, are run in the right order.
|
||||||
-
|
-
|
||||||
|
@ -166,19 +171,19 @@ addUpdateIndex streamer q repo =
|
||||||
- and the action will be run right away.
|
- and the action will be run right away.
|
||||||
-}
|
-}
|
||||||
updateQueue :: MonadIO m => Action m -> (Action m -> Bool) -> Int -> Queue m -> Repo -> m (Queue m)
|
updateQueue :: MonadIO m => Action m -> (Action m -> Bool) -> Int -> Queue m -> Repo -> m (Queue m)
|
||||||
updateQueue !action different sizeincrease q repo = do
|
updateQueue !action conflicting sizeincrease q repo = do
|
||||||
now <- liftIO getPOSIXTime
|
now <- liftIO getPOSIXTime
|
||||||
if now - (_lastchanged q) > _timelimit q
|
if now - (_lastchanged q) > _timelimit q
|
||||||
then if isdifferent
|
then if isconflicting
|
||||||
then do
|
then do
|
||||||
q' <- flush q repo
|
q' <- flush q repo
|
||||||
flush (mk q') repo
|
flush (mk q') repo
|
||||||
else flush (mk q) repo
|
else flush (mk q) repo
|
||||||
else if isdifferent
|
else if isconflicting
|
||||||
then mk <$> flush q repo
|
then mk <$> flush q repo
|
||||||
else return $ mk (q { _lastchanged = now })
|
else return $ mk (q { _lastchanged = now })
|
||||||
where
|
where
|
||||||
isdifferent = not (null (filter different (M.elems (items q))))
|
isconflicting = not (null (filter conflicting (M.elems (items q))))
|
||||||
mk q' = newq
|
mk q' = newq
|
||||||
where
|
where
|
||||||
!newq = q'
|
!newq = q'
|
||||||
|
@ -196,8 +201,8 @@ combineNewOld (CommandAction _cps1 _sc1 _ps1 fs1) (CommandAction cps2 sc2 ps2 fs
|
||||||
CommandAction cps2 sc2 ps2 (fs1++fs2)
|
CommandAction cps2 sc2 ps2 (fs1++fs2)
|
||||||
combineNewOld (UpdateIndexAction s1) (UpdateIndexAction s2) =
|
combineNewOld (UpdateIndexAction s1) (UpdateIndexAction s2) =
|
||||||
UpdateIndexAction (s1++s2)
|
UpdateIndexAction (s1++s2)
|
||||||
combineNewOld (InternalAction _r1 fs1) (InternalAction r2 fs2) =
|
combineNewOld (FlushAction _r1 fs1) (FlushAction r2 fs2) =
|
||||||
InternalAction r2 (fs1++fs2)
|
FlushAction r2 (fs1++fs2)
|
||||||
combineNewOld anew _aold = anew
|
combineNewOld anew _aold = anew
|
||||||
|
|
||||||
{- Merges the contents of the second queue into the first.
|
{- Merges the contents of the second queue into the first.
|
||||||
|
@ -257,6 +262,6 @@ runAction repo action@(CommandAction {}) = liftIO $ do
|
||||||
forceSuccessProcess p pid
|
forceSuccessProcess p pid
|
||||||
go _ _ _ _ _ = error "internal"
|
go _ _ _ _ _ = error "internal"
|
||||||
#endif
|
#endif
|
||||||
runAction repo action@(InternalAction {}) =
|
runAction repo action@(FlushAction {}) =
|
||||||
let InternalActionRunner _ runner = getRunner action
|
let FlushActionRunner _ runner = getFlushActionRunner action
|
||||||
in runner repo (getInternalFiles action)
|
in runner repo (getFlushActionFiles action)
|
||||||
|
|
|
@ -8,17 +8,6 @@ git annex filter-process
|
||||||
|
|
||||||
# DESCRIPTION
|
# DESCRIPTION
|
||||||
|
|
||||||
By default, `git-annex smudge` is used as the git filter driver.
|
|
||||||
This is an alternative way to accomplish the same thing, using git's
|
|
||||||
long-running filter process interface.
|
|
||||||
|
|
||||||
To enable using this, run:
|
|
||||||
|
|
||||||
git config filter.annex.process 'git-annex filter-process'
|
|
||||||
|
|
||||||
There will be no visible difference in behavior between enabling this and
|
|
||||||
not, besides changes in speed and memory use when using git.
|
|
||||||
|
|
||||||
When this is not enabled, each file that git wants to filter involves
|
When this is not enabled, each file that git wants to filter involves
|
||||||
starting up a new `git-annex smudge` process. Starting many such processes
|
starting up a new `git-annex smudge` process. Starting many such processes
|
||||||
for many files can be slow, and can make commands like `git checkout` and
|
for many files can be slow, and can make commands like `git checkout` and
|
||||||
|
@ -28,10 +17,25 @@ non-annexed files do slow it down.)
|
||||||
|
|
||||||
On the other hand when this is enabled, `git add` of a large file does an
|
On the other hand when this is enabled, `git add` of a large file does an
|
||||||
unncessary extra read of the file, and pipes its contents into git-annex.
|
unncessary extra read of the file, and pipes its contents into git-annex.
|
||||||
So if you enable this, it will be faster to use `git-annex add` to add
|
So when this is enabled, it will be faster to use `git-annex add` to add
|
||||||
large files to the annex, rather than `git add`. Other commands that
|
large files to the annex, rather than `git add`. Other commands that
|
||||||
add files, like `git commit -a`, are also impacted by this.
|
add files, like `git commit -a`, are also impacted by this.
|
||||||
|
|
||||||
|
This is used by default in git-annex repositories v9 and above, while
|
||||||
|
v8 repositories use `git-annex smudge` for backwards compatability with
|
||||||
|
older versions of git-annex.
|
||||||
|
|
||||||
|
To enable this in a v8 repository, run:
|
||||||
|
|
||||||
|
git config filter.annex.process 'git-annex filter-process'
|
||||||
|
|
||||||
|
To disable it, you can just unset the config:
|
||||||
|
|
||||||
|
git config --unset filter.annex.process
|
||||||
|
|
||||||
|
There will be no visible difference in behavior between enabling this and
|
||||||
|
not, besides changes in speed and memory use when using git.
|
||||||
|
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
|
|
||||||
* The [[git-annex-common-options]](1) can be used.
|
* The [[git-annex-common-options]](1) can be used.
|
||||||
|
|
|
@ -27,10 +27,13 @@ commit -a`). Afterwards, `git status` then smudged it again, unsure why!
|
||||||
> slowed git-annex unlock. But git status did then avoid doing any more
|
> slowed git-annex unlock. But git status did then avoid doing any more
|
||||||
> smudgeing. It seems that each call to restagePointerFile is running
|
> smudgeing. It seems that each call to restagePointerFile is running
|
||||||
> git update-index, so still one git-annex smudge per file, rather
|
> git update-index, so still one git-annex smudge per file, rather
|
||||||
> than combining several together.
|
> than combining several together. In a v8 repo, the same amount of work
|
||||||
|
> is done either way. In v9+, this is actually slower than before when the
|
||||||
|
> files are small.
|
||||||
>
|
>
|
||||||
> That's because restagePointerFile uses the git queue, and unlock
|
> restagePointerFile was running git update-index once per file
|
||||||
|
> because restagePointerFile uses the git queue, and unlock
|
||||||
> also queues a git add or something, so the queue isn't able to built
|
> also queues a git add or something, so the queue isn't able to built
|
||||||
> up because two dissimilar things are being queued. This seems an
|
> up because two dissimilar things are being queued. This seems an
|
||||||
> unncessary behavior; it could queue up all the git adds and then
|
> unncessary behavior; it could queue up all the git adds and then
|
||||||
> run restagePointerFile after them all.
|
> run restagePointerFile after them all. Implemented that, and [[done]]! --[[Joey]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue