Sped up git-annex add in direct mode and v6 by using git hash-object --batch.

Speeds up hashSymlink and hashPointerFile.
This commit is contained in:
Joey Hess 2016-03-14 15:58:46 -04:00
parent f2772f469a
commit 88a4a6f396
Failed to extract signature
6 changed files with 25 additions and 33 deletions

View file

@ -42,6 +42,7 @@ import qualified Git
import qualified Git.Config import qualified Git.Config
import Annex.Fixup import Annex.Fixup
import Git.CatFile import Git.CatFile
import Git.HashObject
import Git.CheckAttr import Git.CheckAttr
import Git.CheckIgnore import Git.CheckIgnore
import qualified Git.Hook import qualified Git.Hook
@ -106,6 +107,7 @@ data AnnexState = AnnexState
, branchstate :: BranchState , branchstate :: BranchState
, repoqueue :: Maybe Git.Queue.Queue , repoqueue :: Maybe Git.Queue.Queue
, catfilehandles :: M.Map FilePath CatFileHandle , catfilehandles :: M.Map FilePath CatFileHandle
, hashobjecthandle :: Maybe HashObjectHandle
, checkattrhandle :: Maybe CheckAttrHandle , checkattrhandle :: Maybe CheckAttrHandle
, checkignorehandle :: Maybe (Maybe CheckIgnoreHandle) , checkignorehandle :: Maybe (Maybe CheckIgnoreHandle)
, forcebackend :: Maybe String , forcebackend :: Maybe String
@ -151,6 +153,7 @@ newState c r = AnnexState
, branchstate = startBranchState , branchstate = startBranchState
, repoqueue = Nothing , repoqueue = Nothing
, catfilehandles = M.empty , catfilehandles = M.empty
, hashobjecthandle = Nothing
, checkattrhandle = Nothing , checkattrhandle = Nothing
, checkignorehandle = Nothing , checkignorehandle = Nothing
, forcebackend = Nothing , forcebackend = Nothing

View file

@ -11,6 +11,7 @@ import Annex.Common
import Annex import Annex
import Annex.CatFile import Annex.CatFile
import Annex.CheckAttr import Annex.CheckAttr
import Annex.HashObject
import Annex.CheckIgnore import Annex.CheckIgnore
import qualified Annex.Queue import qualified Annex.Queue
@ -64,4 +65,5 @@ mergeState st = do
closehandles = do closehandles = do
catFileStop catFileStop
checkAttrStop checkAttrStop
hashObjectStop
checkIgnoreStop checkIgnoreStop

View file

@ -12,38 +12,31 @@ module Annex.HashObject (
hashObjectStop, hashObjectStop,
) where ) where
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import System.PosixCompat.Types
import Annex.Common import Annex.Common
import qualified Git
import qualified Git.HashObject import qualified Git.HashObject
import qualified Annex import qualified Annex
import Git.Types import Git.Types
import Git.FilePath
import qualified Git.Ref
import Annex.Link
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
where where
startup = do startup = do
inRepo $ Git.hashObjectStart h <- inRepo $ Git.HashObject.hashObjectStart
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h } Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
return h return h
hashObjectStop :: Annex () hashObjectStop :: Annex ()
hashObjectStop = maybe noop stop =<< Annex.hashobjecthandle hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
where where
stop h = do stop h = do
liftIO $ Git.hashObjectStop h liftIO $ Git.HashObject.hashObjectStop h
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing } Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
return ()
hashFile :: FilePath -> Annex Sha hashFile :: FilePath -> Annex Sha
hashFile f = do hashFile f = do
h <- hashObjectHandle h <- hashObjectHandle
Git.HashObject.hashFile h f liftIO $ Git.HashObject.hashFile h f
{- Note that the content will be written to a temp file. {- Note that the content will be written to a temp file.
- So it may be faster to use Git.HashObject.hashObject for large - So it may be faster to use Git.HashObject.hashObject for large
@ -51,4 +44,4 @@ hashFile f = do
hashBlob :: String -> Annex Sha hashBlob :: String -> Annex Sha
hashBlob content = do hashBlob content = do
h <- hashObjectHandle h <- hashObjectHandle
Git.HashObject.hashFile h content liftIO $ Git.HashObject.hashBlob h content

View file

@ -18,11 +18,11 @@ module Annex.Link where
import Annex.Common import Annex.Common
import qualified Annex import qualified Annex
import qualified Git.HashObject
import qualified Git.UpdateIndex import qualified Git.UpdateIndex
import qualified Annex.Queue import qualified Annex.Queue
import Git.Types import Git.Types
import Git.FilePath import Git.FilePath
import Annex.HashObject
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import Data.Int import Data.Int
@ -105,12 +105,7 @@ addAnnexLink linktarget file = do
{- Injects a symlink target into git, returning its Sha. -} {- Injects a symlink target into git, returning its Sha. -}
hashSymlink :: LinkTarget -> Annex Sha hashSymlink :: LinkTarget -> Annex Sha
hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $ hashSymlink linktarget = hashBlob (toInternalGitPath linktarget)
toInternalGitPath linktarget
hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha
hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $
toInternalGitPath linktarget
{- Stages a symlink to an annexed object, using a Sha of its target. -} {- Stages a symlink to an annexed object, using a Sha of its target. -}
stageSymlink :: FilePath -> Sha -> Annex () stageSymlink :: FilePath -> Sha -> Annex ()
@ -120,8 +115,7 @@ stageSymlink file sha =
{- Injects a pointer file content into git, returning its Sha. -} {- Injects a pointer file content into git, returning its Sha. -}
hashPointerFile :: Key -> Annex Sha hashPointerFile :: Key -> Annex Sha
hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $ hashPointerFile key = hashBlob (formatPointer key)
formatPointer key
{- Stages a pointer file, using a Sha of its content -} {- Stages a pointer file, using a Sha of its content -}
stagePointerFile :: FilePath -> Sha -> Annex () stagePointerFile :: FilePath -> Sha -> Annex ()

View file

@ -19,7 +19,7 @@ import qualified Git.LsFiles
import qualified Git.Ref import qualified Git.Ref
import Git.UpdateIndex import Git.UpdateIndex
import Git.Sha import Git.Sha
import Git.HashObject import Annex.HashObject
import Git.Types import Git.Types
import Git.FilePath import Git.FilePath
import Annex.WorkTree import Annex.WorkTree
@ -340,38 +340,36 @@ applyView' mkviewedfile getfilemetadata view = do
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top] (l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex
hasher <- inRepo hashObjectStart
forM_ l $ \f -> do forM_ l $ \f -> do
relf <- getTopFilePath <$> inRepo (toTopFilePath f) relf <- getTopFilePath <$> inRepo (toTopFilePath f)
go uh hasher relf =<< lookupFile f go uh relf =<< lookupFile f
liftIO $ do liftIO $ do
hashObjectStop hasher
void $ stopUpdateIndex uh void $ stopUpdateIndex uh
void clean void clean
genViewBranch view genViewBranch view
where where
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
go uh hasher f (Just k) = do go uh f (Just k) = do
metadata <- getCurrentMetaData k metadata <- getCurrentMetaData k
let metadata' = getfilemetadata f `unionMetaData` metadata let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do forM_ (genviewedfiles f metadata') $ \fv -> do
f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv
stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k) stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
go uh hasher f Nothing go uh f Nothing
| "." `isPrefixOf` f = do | "." `isPrefixOf` f = do
s <- liftIO $ getSymbolicLinkStatus f s <- liftIO $ getSymbolicLinkStatus f
if isSymbolicLink s if isSymbolicLink s
then stagesymlink uh hasher f =<< liftIO (readSymbolicLink f) then stagesymlink uh f =<< liftIO (readSymbolicLink f)
else do else do
sha <- liftIO $ Git.HashObject.hashFile hasher f sha <- hashFile f
let blobtype = if isExecutable (fileMode s) let blobtype = if isExecutable (fileMode s)
then ExecutableBlob then ExecutableBlob
else FileBlob else FileBlob
liftIO . Git.UpdateIndex.streamUpdateIndex' uh liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageFile sha blobtype f) =<< inRepo (Git.UpdateIndex.stageFile sha blobtype f)
| otherwise = noop | otherwise = noop
stagesymlink uh hasher f linktarget = do stagesymlink uh f linktarget = do
sha <- hashSymlink' hasher linktarget sha <- hashSymlink linktarget
liftIO . Git.UpdateIndex.streamUpdateIndex' uh liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageSymlink f sha) =<< inRepo (Git.UpdateIndex.stageSymlink f sha)

2
debian/changelog vendored
View file

@ -13,6 +13,8 @@ git-annex (6.20160230) UNRELEASED; urgency=medium
lost in last release. lost in last release.
* Always try to thaw content, even when annex.crippledfilesystem is set. * Always try to thaw content, even when annex.crippledfilesystem is set.
* Correct git-annex info to include unlocked files in v6 repository. * Correct git-annex info to include unlocked files in v6 repository.
* Sped up git-annex add in direct mode and v6 by using
git hash-object --batch.
-- Joey Hess <id@joeyh.name> Mon, 29 Feb 2016 13:00:30 -0400 -- Joey Hess <id@joeyh.name> Mon, 29 Feb 2016 13:00:30 -0400