Sped up git-annex add in direct mode and v6 by using git hash-object --batch.

Speeds up hashSymlink and hashPointerFile.
This commit is contained in:
Joey Hess 2016-03-14 15:58:46 -04:00
parent f2772f469a
commit 88a4a6f396
Failed to extract signature
6 changed files with 25 additions and 33 deletions

View file

@ -42,6 +42,7 @@ import qualified Git
import qualified Git.Config
import Annex.Fixup
import Git.CatFile
import Git.HashObject
import Git.CheckAttr
import Git.CheckIgnore
import qualified Git.Hook
@ -106,6 +107,7 @@ data AnnexState = AnnexState
, branchstate :: BranchState
, repoqueue :: Maybe Git.Queue.Queue
, catfilehandles :: M.Map FilePath CatFileHandle
, hashobjecthandle :: Maybe HashObjectHandle
, checkattrhandle :: Maybe CheckAttrHandle
, checkignorehandle :: Maybe (Maybe CheckIgnoreHandle)
, forcebackend :: Maybe String
@ -151,6 +153,7 @@ newState c r = AnnexState
, branchstate = startBranchState
, repoqueue = Nothing
, catfilehandles = M.empty
, hashobjecthandle = Nothing
, checkattrhandle = Nothing
, checkignorehandle = Nothing
, forcebackend = Nothing

View file

@ -11,6 +11,7 @@ import Annex.Common
import Annex
import Annex.CatFile
import Annex.CheckAttr
import Annex.HashObject
import Annex.CheckIgnore
import qualified Annex.Queue
@ -64,4 +65,5 @@ mergeState st = do
closehandles = do
catFileStop
checkAttrStop
hashObjectStop
checkIgnoreStop

View file

@ -12,38 +12,31 @@ module Annex.HashObject (
hashObjectStop,
) where
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import System.PosixCompat.Types
import Annex.Common
import qualified Git
import qualified Git.HashObject
import qualified Annex
import Git.Types
import Git.FilePath
import qualified Git.Ref
import Annex.Link
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
where
startup = do
inRepo $ Git.hashObjectStart
h <- inRepo $ Git.HashObject.hashObjectStart
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
return h
hashObjectStop :: Annex ()
hashObjectStop = maybe noop stop =<< Annex.hashobjecthandle
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
where
stop h = do
liftIO $ Git.hashObjectStop h
liftIO $ Git.HashObject.hashObjectStop h
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
return ()
hashFile :: FilePath -> Annex Sha
hashFile f = do
h <- hashObjectHandle
Git.HashObject.hashFile h f
liftIO $ Git.HashObject.hashFile h f
{- Note that the content will be written to a temp file.
- So it may be faster to use Git.HashObject.hashObject for large
@ -51,4 +44,4 @@ hashFile f = do
hashBlob :: String -> Annex Sha
hashBlob content = do
h <- hashObjectHandle
Git.HashObject.hashFile h content
liftIO $ Git.HashObject.hashBlob h content

View file

@ -18,11 +18,11 @@ module Annex.Link where
import Annex.Common
import qualified Annex
import qualified Git.HashObject
import qualified Git.UpdateIndex
import qualified Annex.Queue
import Git.Types
import Git.FilePath
import Annex.HashObject
import qualified Data.ByteString.Lazy as L
import Data.Int
@ -105,12 +105,7 @@ addAnnexLink linktarget file = do
{- Injects a symlink target into git, returning its Sha. -}
hashSymlink :: LinkTarget -> Annex Sha
hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $
toInternalGitPath linktarget
hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha
hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $
toInternalGitPath linktarget
hashSymlink linktarget = hashBlob (toInternalGitPath linktarget)
{- Stages a symlink to an annexed object, using a Sha of its target. -}
stageSymlink :: FilePath -> Sha -> Annex ()
@ -120,8 +115,7 @@ stageSymlink file sha =
{- Injects a pointer file content into git, returning its Sha. -}
hashPointerFile :: Key -> Annex Sha
hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $
formatPointer key
hashPointerFile key = hashBlob (formatPointer key)
{- Stages a pointer file, using a Sha of its content -}
stagePointerFile :: FilePath -> Sha -> Annex ()

View file

@ -19,7 +19,7 @@ import qualified Git.LsFiles
import qualified Git.Ref
import Git.UpdateIndex
import Git.Sha
import Git.HashObject
import Annex.HashObject
import Git.Types
import Git.FilePath
import Annex.WorkTree
@ -340,38 +340,36 @@ applyView' mkviewedfile getfilemetadata view = do
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex
hasher <- inRepo hashObjectStart
forM_ l $ \f -> do
relf <- getTopFilePath <$> inRepo (toTopFilePath f)
go uh hasher relf =<< lookupFile f
go uh relf =<< lookupFile f
liftIO $ do
hashObjectStop hasher
void $ stopUpdateIndex uh
void clean
genViewBranch view
where
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
go uh hasher f (Just k) = do
go uh f (Just k) = do
metadata <- getCurrentMetaData k
let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv
stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k)
go uh hasher f Nothing
stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
go uh f Nothing
| "." `isPrefixOf` f = do
s <- liftIO $ getSymbolicLinkStatus f
if isSymbolicLink s
then stagesymlink uh hasher f =<< liftIO (readSymbolicLink f)
then stagesymlink uh f =<< liftIO (readSymbolicLink f)
else do
sha <- liftIO $ Git.HashObject.hashFile hasher f
sha <- hashFile f
let blobtype = if isExecutable (fileMode s)
then ExecutableBlob
else FileBlob
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageFile sha blobtype f)
| otherwise = noop
stagesymlink uh hasher f linktarget = do
sha <- hashSymlink' hasher linktarget
stagesymlink uh f linktarget = do
sha <- hashSymlink linktarget
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageSymlink f sha)

2
debian/changelog vendored
View file

@ -13,6 +13,8 @@ git-annex (6.20160230) UNRELEASED; urgency=medium
lost in last release.
* Always try to thaw content, even when annex.crippledfilesystem is set.
* Correct git-annex info to include unlocked files in v6 repository.
* Sped up git-annex add in direct mode and v6 by using
git hash-object --batch.
-- Joey Hess <id@joeyh.name> Mon, 29 Feb 2016 13:00:30 -0400