Sped up git-annex add in direct mode and v6 by using git hash-object --batch.

Speeds up hashSymlink and hashPointerFile.
This commit is contained in:
Joey Hess 2016-03-14 15:58:46 -04:00
parent f2772f469a
commit 88a4a6f396
Failed to extract signature
6 changed files with 25 additions and 33 deletions

View file

@ -11,6 +11,7 @@ import Annex.Common
import Annex
import Annex.CatFile
import Annex.CheckAttr
import Annex.HashObject
import Annex.CheckIgnore
import qualified Annex.Queue
@ -64,4 +65,5 @@ mergeState st = do
closehandles = do
catFileStop
checkAttrStop
hashObjectStop
checkIgnoreStop

View file

@ -12,38 +12,31 @@ module Annex.HashObject (
hashObjectStop,
) where
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import System.PosixCompat.Types
import Annex.Common
import qualified Git
import qualified Git.HashObject
import qualified Annex
import Git.Types
import Git.FilePath
import qualified Git.Ref
import Annex.Link
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
where
startup = do
inRepo $ Git.hashObjectStart
h <- inRepo $ Git.HashObject.hashObjectStart
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
return h
hashObjectStop :: Annex ()
hashObjectStop = maybe noop stop =<< Annex.hashobjecthandle
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
where
stop h = do
liftIO $ Git.hashObjectStop h
liftIO $ Git.HashObject.hashObjectStop h
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
return ()
hashFile :: FilePath -> Annex Sha
hashFile f = do
h <- hashObjectHandle
Git.HashObject.hashFile h f
liftIO $ Git.HashObject.hashFile h f
{- Note that the content will be written to a temp file.
- So it may be faster to use Git.HashObject.hashObject for large
@ -51,4 +44,4 @@ hashFile f = do
hashBlob :: String -> Annex Sha
hashBlob content = do
h <- hashObjectHandle
Git.HashObject.hashFile h content
liftIO $ Git.HashObject.hashBlob h content

View file

@ -18,11 +18,11 @@ module Annex.Link where
import Annex.Common
import qualified Annex
import qualified Git.HashObject
import qualified Git.UpdateIndex
import qualified Annex.Queue
import Git.Types
import Git.FilePath
import Annex.HashObject
import qualified Data.ByteString.Lazy as L
import Data.Int
@ -105,12 +105,7 @@ addAnnexLink linktarget file = do
{- Injects a symlink target into git, returning its Sha. -}
hashSymlink :: LinkTarget -> Annex Sha
hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $
toInternalGitPath linktarget
hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha
hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $
toInternalGitPath linktarget
hashSymlink linktarget = hashBlob (toInternalGitPath linktarget)
{- Stages a symlink to an annexed object, using a Sha of its target. -}
stageSymlink :: FilePath -> Sha -> Annex ()
@ -120,8 +115,7 @@ stageSymlink file sha =
{- Injects a pointer file content into git, returning its Sha. -}
hashPointerFile :: Key -> Annex Sha
hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $
formatPointer key
hashPointerFile key = hashBlob (formatPointer key)
{- Stages a pointer file, using a Sha of its content -}
stagePointerFile :: FilePath -> Sha -> Annex ()

View file

@ -19,7 +19,7 @@ import qualified Git.LsFiles
import qualified Git.Ref
import Git.UpdateIndex
import Git.Sha
import Git.HashObject
import Annex.HashObject
import Git.Types
import Git.FilePath
import Annex.WorkTree
@ -340,38 +340,36 @@ applyView' mkviewedfile getfilemetadata view = do
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex
hasher <- inRepo hashObjectStart
forM_ l $ \f -> do
relf <- getTopFilePath <$> inRepo (toTopFilePath f)
go uh hasher relf =<< lookupFile f
go uh relf =<< lookupFile f
liftIO $ do
hashObjectStop hasher
void $ stopUpdateIndex uh
void clean
genViewBranch view
where
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
go uh hasher f (Just k) = do
go uh f (Just k) = do
metadata <- getCurrentMetaData k
let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv
stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k)
go uh hasher f Nothing
stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
go uh f Nothing
| "." `isPrefixOf` f = do
s <- liftIO $ getSymbolicLinkStatus f
if isSymbolicLink s
then stagesymlink uh hasher f =<< liftIO (readSymbolicLink f)
then stagesymlink uh f =<< liftIO (readSymbolicLink f)
else do
sha <- liftIO $ Git.HashObject.hashFile hasher f
sha <- hashFile f
let blobtype = if isExecutable (fileMode s)
then ExecutableBlob
else FileBlob
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageFile sha blobtype f)
| otherwise = noop
stagesymlink uh hasher f linktarget = do
sha <- hashSymlink' hasher linktarget
stagesymlink uh f linktarget = do
sha <- hashSymlink linktarget
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageSymlink f sha)