Sped up git-annex add in direct mode and v6 by using git hash-object --batch.
Speeds up hashSymlink and hashPointerFile.
This commit is contained in:
parent
f2772f469a
commit
88a4a6f396
6 changed files with 25 additions and 33 deletions
3
Annex.hs
3
Annex.hs
|
@ -42,6 +42,7 @@ import qualified Git
|
|||
import qualified Git.Config
|
||||
import Annex.Fixup
|
||||
import Git.CatFile
|
||||
import Git.HashObject
|
||||
import Git.CheckAttr
|
||||
import Git.CheckIgnore
|
||||
import qualified Git.Hook
|
||||
|
@ -106,6 +107,7 @@ data AnnexState = AnnexState
|
|||
, branchstate :: BranchState
|
||||
, repoqueue :: Maybe Git.Queue.Queue
|
||||
, catfilehandles :: M.Map FilePath CatFileHandle
|
||||
, hashobjecthandle :: Maybe HashObjectHandle
|
||||
, checkattrhandle :: Maybe CheckAttrHandle
|
||||
, checkignorehandle :: Maybe (Maybe CheckIgnoreHandle)
|
||||
, forcebackend :: Maybe String
|
||||
|
@ -151,6 +153,7 @@ newState c r = AnnexState
|
|||
, branchstate = startBranchState
|
||||
, repoqueue = Nothing
|
||||
, catfilehandles = M.empty
|
||||
, hashobjecthandle = Nothing
|
||||
, checkattrhandle = Nothing
|
||||
, checkignorehandle = Nothing
|
||||
, forcebackend = Nothing
|
||||
|
|
|
@ -11,6 +11,7 @@ import Annex.Common
|
|||
import Annex
|
||||
import Annex.CatFile
|
||||
import Annex.CheckAttr
|
||||
import Annex.HashObject
|
||||
import Annex.CheckIgnore
|
||||
import qualified Annex.Queue
|
||||
|
||||
|
@ -64,4 +65,5 @@ mergeState st = do
|
|||
closehandles = do
|
||||
catFileStop
|
||||
checkAttrStop
|
||||
hashObjectStop
|
||||
checkIgnoreStop
|
||||
|
|
|
@ -12,38 +12,31 @@ module Annex.HashObject (
|
|||
hashObjectStop,
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Map as M
|
||||
import System.PosixCompat.Types
|
||||
|
||||
import Annex.Common
|
||||
import qualified Git
|
||||
import qualified Git.HashObject
|
||||
import qualified Annex
|
||||
import Git.Types
|
||||
import Git.FilePath
|
||||
import qualified Git.Ref
|
||||
import Annex.Link
|
||||
|
||||
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
|
||||
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
|
||||
where
|
||||
startup = do
|
||||
inRepo $ Git.hashObjectStart
|
||||
h <- inRepo $ Git.HashObject.hashObjectStart
|
||||
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
|
||||
return h
|
||||
|
||||
hashObjectStop :: Annex ()
|
||||
hashObjectStop = maybe noop stop =<< Annex.hashobjecthandle
|
||||
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
|
||||
where
|
||||
stop h = do
|
||||
liftIO $ Git.hashObjectStop h
|
||||
liftIO $ Git.HashObject.hashObjectStop h
|
||||
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
|
||||
return ()
|
||||
|
||||
hashFile :: FilePath -> Annex Sha
|
||||
hashFile f = do
|
||||
h <- hashObjectHandle
|
||||
Git.HashObject.hashFile h f
|
||||
liftIO $ Git.HashObject.hashFile h f
|
||||
|
||||
{- Note that the content will be written to a temp file.
|
||||
- So it may be faster to use Git.HashObject.hashObject for large
|
||||
|
@ -51,4 +44,4 @@ hashFile f = do
|
|||
hashBlob :: String -> Annex Sha
|
||||
hashBlob content = do
|
||||
h <- hashObjectHandle
|
||||
Git.HashObject.hashFile h content
|
||||
liftIO $ Git.HashObject.hashBlob h content
|
||||
|
|
|
@ -18,11 +18,11 @@ module Annex.Link where
|
|||
|
||||
import Annex.Common
|
||||
import qualified Annex
|
||||
import qualified Git.HashObject
|
||||
import qualified Git.UpdateIndex
|
||||
import qualified Annex.Queue
|
||||
import Git.Types
|
||||
import Git.FilePath
|
||||
import Annex.HashObject
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Data.Int
|
||||
|
@ -105,12 +105,7 @@ addAnnexLink linktarget file = do
|
|||
|
||||
{- Injects a symlink target into git, returning its Sha. -}
|
||||
hashSymlink :: LinkTarget -> Annex Sha
|
||||
hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $
|
||||
toInternalGitPath linktarget
|
||||
|
||||
hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha
|
||||
hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $
|
||||
toInternalGitPath linktarget
|
||||
hashSymlink linktarget = hashBlob (toInternalGitPath linktarget)
|
||||
|
||||
{- Stages a symlink to an annexed object, using a Sha of its target. -}
|
||||
stageSymlink :: FilePath -> Sha -> Annex ()
|
||||
|
@ -120,8 +115,7 @@ stageSymlink file sha =
|
|||
|
||||
{- Injects a pointer file content into git, returning its Sha. -}
|
||||
hashPointerFile :: Key -> Annex Sha
|
||||
hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $
|
||||
formatPointer key
|
||||
hashPointerFile key = hashBlob (formatPointer key)
|
||||
|
||||
{- Stages a pointer file, using a Sha of its content -}
|
||||
stagePointerFile :: FilePath -> Sha -> Annex ()
|
||||
|
|
|
@ -19,7 +19,7 @@ import qualified Git.LsFiles
|
|||
import qualified Git.Ref
|
||||
import Git.UpdateIndex
|
||||
import Git.Sha
|
||||
import Git.HashObject
|
||||
import Annex.HashObject
|
||||
import Git.Types
|
||||
import Git.FilePath
|
||||
import Annex.WorkTree
|
||||
|
@ -340,38 +340,36 @@ applyView' mkviewedfile getfilemetadata view = do
|
|||
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
|
||||
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
|
||||
uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex
|
||||
hasher <- inRepo hashObjectStart
|
||||
forM_ l $ \f -> do
|
||||
relf <- getTopFilePath <$> inRepo (toTopFilePath f)
|
||||
go uh hasher relf =<< lookupFile f
|
||||
go uh relf =<< lookupFile f
|
||||
liftIO $ do
|
||||
hashObjectStop hasher
|
||||
void $ stopUpdateIndex uh
|
||||
void clean
|
||||
genViewBranch view
|
||||
where
|
||||
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
|
||||
go uh hasher f (Just k) = do
|
||||
go uh f (Just k) = do
|
||||
metadata <- getCurrentMetaData k
|
||||
let metadata' = getfilemetadata f `unionMetaData` metadata
|
||||
forM_ (genviewedfiles f metadata') $ \fv -> do
|
||||
f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv
|
||||
stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k)
|
||||
go uh hasher f Nothing
|
||||
stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
|
||||
go uh f Nothing
|
||||
| "." `isPrefixOf` f = do
|
||||
s <- liftIO $ getSymbolicLinkStatus f
|
||||
if isSymbolicLink s
|
||||
then stagesymlink uh hasher f =<< liftIO (readSymbolicLink f)
|
||||
then stagesymlink uh f =<< liftIO (readSymbolicLink f)
|
||||
else do
|
||||
sha <- liftIO $ Git.HashObject.hashFile hasher f
|
||||
sha <- hashFile f
|
||||
let blobtype = if isExecutable (fileMode s)
|
||||
then ExecutableBlob
|
||||
else FileBlob
|
||||
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
|
||||
=<< inRepo (Git.UpdateIndex.stageFile sha blobtype f)
|
||||
| otherwise = noop
|
||||
stagesymlink uh hasher f linktarget = do
|
||||
sha <- hashSymlink' hasher linktarget
|
||||
stagesymlink uh f linktarget = do
|
||||
sha <- hashSymlink linktarget
|
||||
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
|
||||
=<< inRepo (Git.UpdateIndex.stageSymlink f sha)
|
||||
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -13,6 +13,8 @@ git-annex (6.20160230) UNRELEASED; urgency=medium
|
|||
lost in last release.
|
||||
* Always try to thaw content, even when annex.crippledfilesystem is set.
|
||||
* Correct git-annex info to include unlocked files in v6 repository.
|
||||
* Sped up git-annex add in direct mode and v6 by using
|
||||
git hash-object --batch.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 29 Feb 2016 13:00:30 -0400
|
||||
|
||||
|
|
Loading…
Reference in a new issue