git-annex/Annex/HashObject.hs
Joey Hess d905232842
use ResourcePool for hash-object handles
Avoid starting an unncessary number of git hash-object processes when
concurrency is enabled.

Sponsored-by: Dartmouth College's DANDI project
2022-07-25 17:32:39 -04:00

66 lines
2.1 KiB
Haskell

{- git hash-object interface
-
- Copyright 2016-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.HashObject (
hashFile,
hashBlob,
hashObjectStop,
mkConcurrentHashObjectHandle,
withHashObjectHandle,
) where
import Annex.Common
import qualified Git.HashObject
import qualified Annex
import Git.Types
import Utility.ResourcePool
import Types.Concurrency
import Annex.Concurrent.Utility
hashObjectStop :: Annex ()
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
where
stop p = do
liftIO $ freeResourcePool p Git.HashObject.hashObjectStop
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
hashFile :: RawFilePath -> Annex Sha
hashFile f = withHashObjectHandle $ \h ->
liftIO $ Git.HashObject.hashFile h f
{- Note that the content will be written to a temp file.
- So it may be faster to use Git.HashObject.hashObject for large
- blob contents. -}
hashBlob :: Git.HashObject.HashableBlob b => b -> Annex Sha
hashBlob content = withHashObjectHandle $ \h ->
liftIO $ Git.HashObject.hashBlob h content
withHashObjectHandle :: (Git.HashObject.HashObjectHandle -> Annex a) -> Annex a
withHashObjectHandle a =
maybe mkpool go =<< Annex.getState Annex.hashobjecthandle
where
go p = withResourcePool p start a
start = inRepo $ Git.HashObject.hashObjectStart True
mkpool = do
-- This only runs in non-concurrent code paths;
-- a concurrent pool is set up earlier when needed.
p <- mkResourcePoolNonConcurrent start
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just p }
go p
mkConcurrentHashObjectHandle :: Concurrency -> Annex (ResourcePool Git.HashObject.HashObjectHandle)
mkConcurrentHashObjectHandle c =
Annex.getState Annex.hashobjecthandle >>= \case
Just p@(ResourcePool {}) -> return p
_ -> mkResourcePool =<< liftIO (maxHashObjects c)
{- git hash-object is typically CPU bound, and is not likely to be the main
- bottleneck for any command. So limit to the number of CPU cores, maximum,
- while respecting the -Jn value.
-}
maxHashObjects :: Concurrency -> IO Int
maxHashObjects = concurrencyUpToCpus