git-annex/Annex/HashObject.hs

67 lines
2.1 KiB
Haskell
Raw Permalink Normal View History

{- git hash-object interface
2016-03-14 19:54:46 +00:00
-
- Copyright 2016-2022 Joey Hess <id@joeyh.name>
2016-03-14 19:54:46 +00:00
-
- Licensed under the GNU AGPL version 3 or higher.
2016-03-14 19:54:46 +00:00
-}
module Annex.HashObject (
hashFile,
hashBlob,
hashObjectStop,
mkConcurrentHashObjectHandle,
withHashObjectHandle,
2016-03-14 19:54:46 +00:00
) where
import Annex.Common
import qualified Git.HashObject
import qualified Annex
import Git.Types
import Utility.ResourcePool
import Types.Concurrency
import Annex.Concurrent.Utility
2016-03-14 19:54:46 +00:00
hashObjectStop :: Annex ()
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
2016-03-14 19:54:46 +00:00
where
stop p = do
liftIO $ freeResourcePool p Git.HashObject.hashObjectStop
2016-03-14 19:54:46 +00:00
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
hashFile :: RawFilePath -> Annex Sha
hashFile f = withHashObjectHandle $ \h ->
liftIO $ Git.HashObject.hashFile h f
2016-03-14 19:54:46 +00:00
{- Note that the content will be written to a temp file.
- So it may be faster to use Git.HashObject.hashObject for large
- blob contents. -}
hashBlob :: Git.HashObject.HashableBlob b => b -> Annex Sha
hashBlob content = withHashObjectHandle $ \h ->
liftIO $ Git.HashObject.hashBlob h content
withHashObjectHandle :: (Git.HashObject.HashObjectHandle -> Annex a) -> Annex a
withHashObjectHandle a =
maybe mkpool go =<< Annex.getState Annex.hashobjecthandle
where
go p = withResourcePool p start a
start = inRepo $ Git.HashObject.hashObjectStart True
mkpool = do
-- This only runs in non-concurrent code paths;
-- a concurrent pool is set up earlier when needed.
p <- mkResourcePoolNonConcurrent start
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just p }
go p
mkConcurrentHashObjectHandle :: Concurrency -> Annex (ResourcePool Git.HashObject.HashObjectHandle)
mkConcurrentHashObjectHandle c =
Annex.getState Annex.hashobjecthandle >>= \case
Just p@(ResourcePool {}) -> return p
_ -> mkResourcePool =<< liftIO (maxHashObjects c)
{- git hash-object is typically CPU bound, and is not likely to be the main
- bottleneck for any command. So limit to the number of CPU cores, maximum,
- while respecting the -Jn value.
-}
maxHashObjects :: Concurrency -> IO Int
maxHashObjects = concurrencyUpToCpus