use ResourcePool for hash-object handles
Avoid starting an unncessary number of git hash-object processes when concurrency is enabled. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
b1c49c373a
commit
d905232842
7 changed files with 56 additions and 28 deletions
2
Annex.hs
2
Annex.hs
|
@ -175,7 +175,7 @@ data AnnexState = AnnexState
|
||||||
, branchstate :: BranchState
|
, branchstate :: BranchState
|
||||||
, repoqueue :: Maybe (Git.Queue.Queue Annex)
|
, repoqueue :: Maybe (Git.Queue.Queue Annex)
|
||||||
, catfilehandles :: CatFileHandles
|
, catfilehandles :: CatFileHandles
|
||||||
, hashobjecthandle :: Maybe HashObjectHandle
|
, hashobjecthandle :: Maybe (ResourcePool HashObjectHandle)
|
||||||
, checkattrhandle :: Maybe (ResourcePool CheckAttrHandle)
|
, checkattrhandle :: Maybe (ResourcePool CheckAttrHandle)
|
||||||
, checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle)
|
, checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle)
|
||||||
, globalnumcopies :: Maybe NumCopies
|
, globalnumcopies :: Maybe NumCopies
|
||||||
|
|
|
@ -631,7 +631,7 @@ genIndex g = Git.UpdateIndex.streamUpdateIndex g
|
||||||
mergeIndex :: JournalLocked -> [Git.Ref] -> Annex ()
|
mergeIndex :: JournalLocked -> [Git.Ref] -> Annex ()
|
||||||
mergeIndex jl branches = do
|
mergeIndex jl branches = do
|
||||||
prepareModifyIndex jl
|
prepareModifyIndex jl
|
||||||
hashhandle <- hashObjectHandle
|
withHashObjectHandle $ \hashhandle ->
|
||||||
withCatFileHandle $ \ch ->
|
withCatFileHandle $ \ch ->
|
||||||
inRepo $ \g -> Git.UnionMerge.mergeIndex hashhandle ch g branches
|
inRepo $ \g -> Git.UnionMerge.mergeIndex hashhandle ch g branches
|
||||||
|
|
||||||
|
@ -709,7 +709,7 @@ stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
|
||||||
g <- gitRepo
|
g <- gitRepo
|
||||||
let dir = gitAnnexJournalDir g
|
let dir = gitAnnexJournalDir g
|
||||||
(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
|
(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
|
||||||
h <- hashObjectHandle
|
withHashObjectHandle $ \h ->
|
||||||
withJournalHandle gitAnnexJournalDir $ \jh ->
|
withJournalHandle gitAnnexJournalDir $ \jh ->
|
||||||
Git.UpdateIndex.streamUpdateIndex g
|
Git.UpdateIndex.streamUpdateIndex g
|
||||||
[genstream dir h jh jlogh]
|
[genstream dir h jh jlogh]
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex concurrent state
|
{- git-annex concurrent state
|
||||||
-
|
-
|
||||||
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
|
- Copyright 2015-2022 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -18,6 +18,7 @@ import Types.Concurrency
|
||||||
import Types.CatFileHandles
|
import Types.CatFileHandles
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import Annex.CheckAttr
|
import Annex.CheckAttr
|
||||||
|
import Annex.HashObject
|
||||||
import Annex.CheckIgnore
|
import Annex.CheckIgnore
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
@ -47,14 +48,17 @@ setConcurrency' c f = do
|
||||||
fromnonconcurrent = do
|
fromnonconcurrent = do
|
||||||
catFileStop
|
catFileStop
|
||||||
checkAttrStop
|
checkAttrStop
|
||||||
|
hashObjectStop
|
||||||
checkIgnoreStop
|
checkIgnoreStop
|
||||||
cfh <- liftIO catFileHandlesPool
|
cfh <- liftIO catFileHandlesPool
|
||||||
cah <- mkConcurrentCheckAttrHandle c
|
cah <- mkConcurrentCheckAttrHandle c
|
||||||
|
hoh <- mkConcurrentHashObjectHandle c
|
||||||
cih <- mkConcurrentCheckIgnoreHandle c
|
cih <- mkConcurrentCheckIgnoreHandle c
|
||||||
Annex.changeState $ \s -> s
|
Annex.changeState $ \s -> s
|
||||||
{ Annex.concurrency = newc
|
{ Annex.concurrency = newc
|
||||||
, Annex.catfilehandles = cfh
|
, Annex.catfilehandles = cfh
|
||||||
, Annex.checkattrhandle = Just cah
|
, Annex.checkattrhandle = Just cah
|
||||||
|
, Annex.hashobjecthandle = Just hoh
|
||||||
, Annex.checkignorehandle = Just cih
|
, Annex.checkignorehandle = Just cih
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git hash-object interface, with handle automatically stored in the Annex monad
|
{- git hash-object interface
|
||||||
-
|
-
|
||||||
- Copyright 2016 Joey Hess <id@joeyh.name>
|
- Copyright 2016-2022 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -8,40 +8,59 @@
|
||||||
module Annex.HashObject (
|
module Annex.HashObject (
|
||||||
hashFile,
|
hashFile,
|
||||||
hashBlob,
|
hashBlob,
|
||||||
hashObjectHandle,
|
|
||||||
hashObjectStop,
|
hashObjectStop,
|
||||||
|
mkConcurrentHashObjectHandle,
|
||||||
|
withHashObjectHandle,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
import qualified Git.HashObject
|
import qualified Git.HashObject
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Git.Types
|
import Git.Types
|
||||||
|
import Utility.ResourcePool
|
||||||
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
|
import Types.Concurrency
|
||||||
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
|
import Annex.Concurrent.Utility
|
||||||
where
|
|
||||||
startup = do
|
|
||||||
h <- inRepo $ Git.HashObject.hashObjectStart True
|
|
||||||
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
|
|
||||||
return h
|
|
||||||
|
|
||||||
hashObjectStop :: Annex ()
|
hashObjectStop :: Annex ()
|
||||||
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
|
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
|
||||||
where
|
where
|
||||||
stop h = do
|
stop p = do
|
||||||
liftIO $ Git.HashObject.hashObjectStop h
|
liftIO $ freeResourcePool p Git.HashObject.hashObjectStop
|
||||||
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
|
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
|
||||||
return ()
|
|
||||||
|
|
||||||
hashFile :: RawFilePath -> Annex Sha
|
hashFile :: RawFilePath -> Annex Sha
|
||||||
hashFile f = do
|
hashFile f = withHashObjectHandle $ \h ->
|
||||||
h <- hashObjectHandle
|
|
||||||
liftIO $ Git.HashObject.hashFile h f
|
liftIO $ Git.HashObject.hashFile h f
|
||||||
|
|
||||||
{- Note that the content will be written to a temp file.
|
{- Note that the content will be written to a temp file.
|
||||||
- So it may be faster to use Git.HashObject.hashObject for large
|
- So it may be faster to use Git.HashObject.hashObject for large
|
||||||
- blob contents. -}
|
- blob contents. -}
|
||||||
hashBlob :: Git.HashObject.HashableBlob b => b -> Annex Sha
|
hashBlob :: Git.HashObject.HashableBlob b => b -> Annex Sha
|
||||||
hashBlob content = do
|
hashBlob content = withHashObjectHandle $ \h ->
|
||||||
h <- hashObjectHandle
|
|
||||||
liftIO $ Git.HashObject.hashBlob h content
|
liftIO $ Git.HashObject.hashBlob h content
|
||||||
|
|
||||||
|
withHashObjectHandle :: (Git.HashObject.HashObjectHandle -> Annex a) -> Annex a
|
||||||
|
withHashObjectHandle a =
|
||||||
|
maybe mkpool go =<< Annex.getState Annex.hashobjecthandle
|
||||||
|
where
|
||||||
|
go p = withResourcePool p start a
|
||||||
|
start = inRepo $ Git.HashObject.hashObjectStart True
|
||||||
|
mkpool = do
|
||||||
|
-- This only runs in non-concurrent code paths;
|
||||||
|
-- a concurrent pool is set up earlier when needed.
|
||||||
|
p <- mkResourcePoolNonConcurrent start
|
||||||
|
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just p }
|
||||||
|
go p
|
||||||
|
|
||||||
|
mkConcurrentHashObjectHandle :: Concurrency -> Annex (ResourcePool Git.HashObject.HashObjectHandle)
|
||||||
|
mkConcurrentHashObjectHandle c =
|
||||||
|
Annex.getState Annex.hashobjecthandle >>= \case
|
||||||
|
Just p@(ResourcePool {}) -> return p
|
||||||
|
_ -> mkResourcePool =<< liftIO (maxHashObjects c)
|
||||||
|
|
||||||
|
{- git hash-object is typically CPU bound, and is not likely to be the main
|
||||||
|
- bottleneck for any command. So limit to the number of CPU cores, maximum,
|
||||||
|
- while respecting the -Jn value.
|
||||||
|
-}
|
||||||
|
maxHashObjects :: Concurrency -> IO Int
|
||||||
|
maxHashObjects = concurrencyUpToCpus
|
||||||
|
|
|
@ -4,6 +4,8 @@ git-annex (10.20220725) UNRELEASED; urgency=medium
|
||||||
automatically upgrade to v10 in a year's time.
|
automatically upgrade to v10 in a year's time.
|
||||||
To avoid this upgrade, you can set annex.autoupgraderepository to false.
|
To avoid this upgrade, you can set annex.autoupgraderepository to false.
|
||||||
* Use v10 by default for new repositories.
|
* Use v10 by default for new repositories.
|
||||||
|
* Avoid starting an unncessary number of git hash-object processes when
|
||||||
|
concurrency is enabled.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 25 Jul 2022 15:35:45 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 25 Jul 2022 15:35:45 -0400
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ mkResourcePool maxsz = liftIO $
|
||||||
<*> newTVarIO []
|
<*> newTVarIO []
|
||||||
|
|
||||||
{- When there will not be multiple threads that may
|
{- When there will not be multiple threads that may
|
||||||
- may concurrently try to use it, using this is more
|
- concurrently try to use it, using this is more
|
||||||
- efficient than mkResourcePool.
|
- efficient than mkResourcePool.
|
||||||
-}
|
-}
|
||||||
mkResourcePoolNonConcurrent :: (MonadMask m, MonadIO m) => m r -> m (ResourcePool r)
|
mkResourcePoolNonConcurrent :: (MonadMask m, MonadIO m) => m r -> m (ResourcePool r)
|
||||||
|
|
|
@ -43,3 +43,6 @@ which might be ok but still wonder why they are just sleeping there in more than
|
||||||
[[!tag projects/dandi]]
|
[[!tag projects/dandi]]
|
||||||
|
|
||||||
|
|
||||||
|
> [[done]]; this is now handled like other git helper processes
|
||||||
|
> and will be capped to the maximum of the number of jobs or cpu cores,
|
||||||
|
> and in practice usually fewer than that will be started. --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue