Sped up git-annex add in direct mode and v6 by using git hash-object --batch.
Speeds up hashSymlink and hashPointerFile.
This commit is contained in:
parent
f2772f469a
commit
88a4a6f396
6 changed files with 25 additions and 33 deletions
3
Annex.hs
3
Annex.hs
|
@ -42,6 +42,7 @@ import qualified Git
|
||||||
import qualified Git.Config
|
import qualified Git.Config
|
||||||
import Annex.Fixup
|
import Annex.Fixup
|
||||||
import Git.CatFile
|
import Git.CatFile
|
||||||
|
import Git.HashObject
|
||||||
import Git.CheckAttr
|
import Git.CheckAttr
|
||||||
import Git.CheckIgnore
|
import Git.CheckIgnore
|
||||||
import qualified Git.Hook
|
import qualified Git.Hook
|
||||||
|
@ -106,6 +107,7 @@ data AnnexState = AnnexState
|
||||||
, branchstate :: BranchState
|
, branchstate :: BranchState
|
||||||
, repoqueue :: Maybe Git.Queue.Queue
|
, repoqueue :: Maybe Git.Queue.Queue
|
||||||
, catfilehandles :: M.Map FilePath CatFileHandle
|
, catfilehandles :: M.Map FilePath CatFileHandle
|
||||||
|
, hashobjecthandle :: Maybe HashObjectHandle
|
||||||
, checkattrhandle :: Maybe CheckAttrHandle
|
, checkattrhandle :: Maybe CheckAttrHandle
|
||||||
, checkignorehandle :: Maybe (Maybe CheckIgnoreHandle)
|
, checkignorehandle :: Maybe (Maybe CheckIgnoreHandle)
|
||||||
, forcebackend :: Maybe String
|
, forcebackend :: Maybe String
|
||||||
|
@ -151,6 +153,7 @@ newState c r = AnnexState
|
||||||
, branchstate = startBranchState
|
, branchstate = startBranchState
|
||||||
, repoqueue = Nothing
|
, repoqueue = Nothing
|
||||||
, catfilehandles = M.empty
|
, catfilehandles = M.empty
|
||||||
|
, hashobjecthandle = Nothing
|
||||||
, checkattrhandle = Nothing
|
, checkattrhandle = Nothing
|
||||||
, checkignorehandle = Nothing
|
, checkignorehandle = Nothing
|
||||||
, forcebackend = Nothing
|
, forcebackend = Nothing
|
||||||
|
|
|
@ -11,6 +11,7 @@ import Annex.Common
|
||||||
import Annex
|
import Annex
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import Annex.CheckAttr
|
import Annex.CheckAttr
|
||||||
|
import Annex.HashObject
|
||||||
import Annex.CheckIgnore
|
import Annex.CheckIgnore
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
|
|
||||||
|
@ -64,4 +65,5 @@ mergeState st = do
|
||||||
closehandles = do
|
closehandles = do
|
||||||
catFileStop
|
catFileStop
|
||||||
checkAttrStop
|
checkAttrStop
|
||||||
|
hashObjectStop
|
||||||
checkIgnoreStop
|
checkIgnoreStop
|
||||||
|
|
|
@ -12,38 +12,31 @@ module Annex.HashObject (
|
||||||
hashObjectStop,
|
hashObjectStop,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import qualified Data.ByteString.Lazy as L
|
|
||||||
import qualified Data.Map as M
|
|
||||||
import System.PosixCompat.Types
|
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
import qualified Git
|
|
||||||
import qualified Git.HashObject
|
import qualified Git.HashObject
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.FilePath
|
|
||||||
import qualified Git.Ref
|
|
||||||
import Annex.Link
|
|
||||||
|
|
||||||
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
|
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
|
||||||
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
|
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
|
||||||
where
|
where
|
||||||
startup = do
|
startup = do
|
||||||
inRepo $ Git.hashObjectStart
|
h <- inRepo $ Git.HashObject.hashObjectStart
|
||||||
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
|
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
|
||||||
return h
|
return h
|
||||||
|
|
||||||
hashObjectStop :: Annex ()
|
hashObjectStop :: Annex ()
|
||||||
hashObjectStop = maybe noop stop =<< Annex.hashobjecthandle
|
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
|
||||||
where
|
where
|
||||||
stop h = do
|
stop h = do
|
||||||
liftIO $ Git.hashObjectStop h
|
liftIO $ Git.HashObject.hashObjectStop h
|
||||||
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
|
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
|
||||||
|
return ()
|
||||||
|
|
||||||
hashFile :: FilePath -> Annex Sha
|
hashFile :: FilePath -> Annex Sha
|
||||||
hashFile f = do
|
hashFile f = do
|
||||||
h <- hashObjectHandle
|
h <- hashObjectHandle
|
||||||
Git.HashObject.hashFile h f
|
liftIO $ Git.HashObject.hashFile h f
|
||||||
|
|
||||||
{- Note that the content will be written to a temp file.
|
{- Note that the content will be written to a temp file.
|
||||||
- So it may be faster to use Git.HashObject.hashObject for large
|
- So it may be faster to use Git.HashObject.hashObject for large
|
||||||
|
@ -51,4 +44,4 @@ hashFile f = do
|
||||||
hashBlob :: String -> Annex Sha
|
hashBlob :: String -> Annex Sha
|
||||||
hashBlob content = do
|
hashBlob content = do
|
||||||
h <- hashObjectHandle
|
h <- hashObjectHandle
|
||||||
Git.HashObject.hashFile h content
|
liftIO $ Git.HashObject.hashBlob h content
|
||||||
|
|
|
@ -18,11 +18,11 @@ module Annex.Link where
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import qualified Git.HashObject
|
|
||||||
import qualified Git.UpdateIndex
|
import qualified Git.UpdateIndex
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
|
import Annex.HashObject
|
||||||
|
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
import Data.Int
|
import Data.Int
|
||||||
|
@ -105,12 +105,7 @@ addAnnexLink linktarget file = do
|
||||||
|
|
||||||
{- Injects a symlink target into git, returning its Sha. -}
|
{- Injects a symlink target into git, returning its Sha. -}
|
||||||
hashSymlink :: LinkTarget -> Annex Sha
|
hashSymlink :: LinkTarget -> Annex Sha
|
||||||
hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject $
|
hashSymlink linktarget = hashBlob (toInternalGitPath linktarget)
|
||||||
toInternalGitPath linktarget
|
|
||||||
|
|
||||||
hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha
|
|
||||||
hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $
|
|
||||||
toInternalGitPath linktarget
|
|
||||||
|
|
||||||
{- Stages a symlink to an annexed object, using a Sha of its target. -}
|
{- Stages a symlink to an annexed object, using a Sha of its target. -}
|
||||||
stageSymlink :: FilePath -> Sha -> Annex ()
|
stageSymlink :: FilePath -> Sha -> Annex ()
|
||||||
|
@ -120,8 +115,7 @@ stageSymlink file sha =
|
||||||
|
|
||||||
{- Injects a pointer file content into git, returning its Sha. -}
|
{- Injects a pointer file content into git, returning its Sha. -}
|
||||||
hashPointerFile :: Key -> Annex Sha
|
hashPointerFile :: Key -> Annex Sha
|
||||||
hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $
|
hashPointerFile key = hashBlob (formatPointer key)
|
||||||
formatPointer key
|
|
||||||
|
|
||||||
{- Stages a pointer file, using a Sha of its content -}
|
{- Stages a pointer file, using a Sha of its content -}
|
||||||
stagePointerFile :: FilePath -> Sha -> Annex ()
|
stagePointerFile :: FilePath -> Sha -> Annex ()
|
||||||
|
|
|
@ -19,7 +19,7 @@ import qualified Git.LsFiles
|
||||||
import qualified Git.Ref
|
import qualified Git.Ref
|
||||||
import Git.UpdateIndex
|
import Git.UpdateIndex
|
||||||
import Git.Sha
|
import Git.Sha
|
||||||
import Git.HashObject
|
import Annex.HashObject
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
import Annex.WorkTree
|
import Annex.WorkTree
|
||||||
|
@ -340,38 +340,36 @@ applyView' mkviewedfile getfilemetadata view = do
|
||||||
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
|
(l, clean) <- inRepo $ Git.LsFiles.inRepo [top]
|
||||||
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
|
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
|
||||||
uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex
|
uh <- withViewIndex $ inRepo Git.UpdateIndex.startUpdateIndex
|
||||||
hasher <- inRepo hashObjectStart
|
|
||||||
forM_ l $ \f -> do
|
forM_ l $ \f -> do
|
||||||
relf <- getTopFilePath <$> inRepo (toTopFilePath f)
|
relf <- getTopFilePath <$> inRepo (toTopFilePath f)
|
||||||
go uh hasher relf =<< lookupFile f
|
go uh relf =<< lookupFile f
|
||||||
liftIO $ do
|
liftIO $ do
|
||||||
hashObjectStop hasher
|
|
||||||
void $ stopUpdateIndex uh
|
void $ stopUpdateIndex uh
|
||||||
void clean
|
void clean
|
||||||
genViewBranch view
|
genViewBranch view
|
||||||
where
|
where
|
||||||
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
|
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
|
||||||
go uh hasher f (Just k) = do
|
go uh f (Just k) = do
|
||||||
metadata <- getCurrentMetaData k
|
metadata <- getCurrentMetaData k
|
||||||
let metadata' = getfilemetadata f `unionMetaData` metadata
|
let metadata' = getfilemetadata f `unionMetaData` metadata
|
||||||
forM_ (genviewedfiles f metadata') $ \fv -> do
|
forM_ (genviewedfiles f metadata') $ \fv -> do
|
||||||
f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv
|
f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv
|
||||||
stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k)
|
stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
|
||||||
go uh hasher f Nothing
|
go uh f Nothing
|
||||||
| "." `isPrefixOf` f = do
|
| "." `isPrefixOf` f = do
|
||||||
s <- liftIO $ getSymbolicLinkStatus f
|
s <- liftIO $ getSymbolicLinkStatus f
|
||||||
if isSymbolicLink s
|
if isSymbolicLink s
|
||||||
then stagesymlink uh hasher f =<< liftIO (readSymbolicLink f)
|
then stagesymlink uh f =<< liftIO (readSymbolicLink f)
|
||||||
else do
|
else do
|
||||||
sha <- liftIO $ Git.HashObject.hashFile hasher f
|
sha <- hashFile f
|
||||||
let blobtype = if isExecutable (fileMode s)
|
let blobtype = if isExecutable (fileMode s)
|
||||||
then ExecutableBlob
|
then ExecutableBlob
|
||||||
else FileBlob
|
else FileBlob
|
||||||
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
|
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
|
||||||
=<< inRepo (Git.UpdateIndex.stageFile sha blobtype f)
|
=<< inRepo (Git.UpdateIndex.stageFile sha blobtype f)
|
||||||
| otherwise = noop
|
| otherwise = noop
|
||||||
stagesymlink uh hasher f linktarget = do
|
stagesymlink uh f linktarget = do
|
||||||
sha <- hashSymlink' hasher linktarget
|
sha <- hashSymlink linktarget
|
||||||
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
|
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
|
||||||
=<< inRepo (Git.UpdateIndex.stageSymlink f sha)
|
=<< inRepo (Git.UpdateIndex.stageSymlink f sha)
|
||||||
|
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -13,6 +13,8 @@ git-annex (6.20160230) UNRELEASED; urgency=medium
|
||||||
lost in last release.
|
lost in last release.
|
||||||
* Always try to thaw content, even when annex.crippledfilesystem is set.
|
* Always try to thaw content, even when annex.crippledfilesystem is set.
|
||||||
* Correct git-annex info to include unlocked files in v6 repository.
|
* Correct git-annex info to include unlocked files in v6 repository.
|
||||||
|
* Sped up git-annex add in direct mode and v6 by using
|
||||||
|
git hash-object --batch.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 29 Feb 2016 13:00:30 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 29 Feb 2016 13:00:30 -0400
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue