add: Significantly speed up adding lots of non-large files to git
* add: Significantly speed up adding lots of non-large files to git, by disabling the annex smudge filter when running git add. * add --force-small: Run git add rather than updating the index itself, so any other smudge filters than the annex one that may be enabled will be used.
This commit is contained in:
parent
0b2b666a38
commit
5ce61c6b2a
6 changed files with 43 additions and 40 deletions
|
@ -20,6 +20,7 @@ import Git.Index
|
||||||
import Git.Env
|
import Git.Env
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
|
import Config.Smudge
|
||||||
|
|
||||||
{- Runs an action using a different git index file. -}
|
{- Runs an action using a different git index file. -}
|
||||||
withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a
|
withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a
|
||||||
|
@ -67,16 +68,12 @@ withIndexFile i = withAltRepo usecachedgitenv restoregitenv
|
||||||
- Smudge and clean filters are disabled in this work tree. -}
|
- Smudge and clean filters are disabled in this work tree. -}
|
||||||
withWorkTree :: FilePath -> Annex a -> Annex a
|
withWorkTree :: FilePath -> Annex a -> Annex a
|
||||||
withWorkTree d a = withAltRepo
|
withWorkTree d a = withAltRepo
|
||||||
(\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ disableSmudgeConfig }, ()))
|
(\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ bypassSmudgeConfig }, ()))
|
||||||
(\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g })
|
(\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g })
|
||||||
(const a)
|
(const a)
|
||||||
where
|
where
|
||||||
modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) }
|
modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) }
|
||||||
modlocation _ = error "withWorkTree of non-local git repo"
|
modlocation _ = error "withWorkTree of non-local git repo"
|
||||||
disableSmudgeConfig = map Param
|
|
||||||
[ "-c", "filter.annex.smudge="
|
|
||||||
, "-c", "filter.annex.clean="
|
|
||||||
]
|
|
||||||
|
|
||||||
{- Runs an action with the git index file and HEAD, and a few other
|
{- Runs an action with the git index file and HEAD, and a few other
|
||||||
- files that are related to the work tree coming from an overlay
|
- files that are related to the work tree coming from an overlay
|
||||||
|
|
10
CHANGELOG
10
CHANGELOG
|
@ -1,3 +1,13 @@
|
||||||
|
git-annex (8.20201130) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
|
* add: Significantly speed up adding lots of non-large files to git,
|
||||||
|
by disabling the annex smudge filter when running git add.
|
||||||
|
* add --force-small: Run git add rather than updating the index itself,
|
||||||
|
so any other smudge filters than the annex one that may be enabled will
|
||||||
|
be used.
|
||||||
|
|
||||||
|
-- Joey Hess <id@joeyh.name> Mon, 04 Jan 2021 12:52:41 -0400
|
||||||
|
|
||||||
git-annex (8.20201129) upstream; urgency=medium
|
git-annex (8.20201129) upstream; urgency=medium
|
||||||
|
|
||||||
* New borg special remote. This is a new kind of remote, that examines
|
* New borg special remote. This is a new kind of remote, that examines
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex command
|
{- git-annex command
|
||||||
-
|
-
|
||||||
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -17,13 +17,10 @@ import qualified Database.Keys
|
||||||
import Annex.FileMatcher
|
import Annex.FileMatcher
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
import Annex.Tmp
|
import Annex.Tmp
|
||||||
import Annex.HashObject
|
|
||||||
import Messages.Progress
|
import Messages.Progress
|
||||||
import Git.Types
|
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
import Config.GitConfig
|
import Config.GitConfig
|
||||||
import qualified Git.UpdateIndex
|
import Config.Smudge
|
||||||
import Utility.FileMode
|
|
||||||
import Utility.OptParse
|
import Utility.OptParse
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
|
@ -119,37 +116,26 @@ startSmall o si file =
|
||||||
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool
|
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool
|
||||||
addSmall ci file = do
|
addSmall ci file = do
|
||||||
showNote "non-large file; adding content to git repository"
|
showNote "non-large file; adding content to git repository"
|
||||||
addFile ci file
|
addFile Small ci file
|
||||||
|
|
||||||
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
|
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
|
||||||
startSmallOverridden o si file =
|
startSmallOverridden o si file =
|
||||||
starting "add" (ActionItemWorkTreeFile file) si $
|
starting "add" (ActionItemWorkTreeFile file) si $ next $ do
|
||||||
next $ addSmallOverridden o file
|
|
||||||
|
|
||||||
addSmallOverridden :: AddOptions -> RawFilePath -> Annex Bool
|
|
||||||
addSmallOverridden o file = do
|
|
||||||
showNote "adding content to git repository"
|
showNote "adding content to git repository"
|
||||||
s <- liftIO $ R.getSymbolicLinkStatus file
|
addFile Small (checkGitIgnoreOption o) file
|
||||||
if not (isRegularFile s)
|
|
||||||
then addFile (checkGitIgnoreOption o) file
|
|
||||||
else do
|
|
||||||
-- Can't use addFile because the clean filter will
|
|
||||||
-- honor annex.largefiles and it has been overridden.
|
|
||||||
-- Instead, hash the file and add to the index.
|
|
||||||
sha <- hashFile file
|
|
||||||
let ty = if isExecutable (fileMode s)
|
|
||||||
then TreeExecutable
|
|
||||||
else TreeFile
|
|
||||||
Annex.Queue.addUpdateIndex =<<
|
|
||||||
inRepo (Git.UpdateIndex.stageFile sha ty (fromRawFilePath file))
|
|
||||||
return True
|
|
||||||
|
|
||||||
addFile :: CheckGitIgnore -> RawFilePath -> Annex Bool
|
data SmallOrLarge = Small | Large
|
||||||
addFile ci file = do
|
|
||||||
|
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
|
||||||
|
addFile smallorlarge ci file = do
|
||||||
ps <- gitAddParams ci
|
ps <- gitAddParams ci
|
||||||
Annex.Queue.addCommand [] "add" (ps++[Param "--"])
|
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
|
||||||
[fromRawFilePath file]
|
[fromRawFilePath file]
|
||||||
return True
|
return True
|
||||||
|
where
|
||||||
|
cps = case smallorlarge of
|
||||||
|
Large -> []
|
||||||
|
Small -> bypassSmudgeConfig
|
||||||
|
|
||||||
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
||||||
start o si file addunlockedmatcher = do
|
start o si file addunlockedmatcher = do
|
||||||
|
@ -164,7 +150,7 @@ start o si file addunlockedmatcher = do
|
||||||
| otherwise ->
|
| otherwise ->
|
||||||
starting "add" (ActionItemWorkTreeFile file) si $
|
starting "add" (ActionItemWorkTreeFile file) si $
|
||||||
if isSymbolicLink s
|
if isSymbolicLink s
|
||||||
then next $ addFile (checkGitIgnoreOption o) file
|
then next $ addFile Small (checkGitIgnoreOption o) file
|
||||||
else perform o file addunlockedmatcher
|
else perform o file addunlockedmatcher
|
||||||
addpresent key =
|
addpresent key =
|
||||||
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||||
|
@ -180,7 +166,7 @@ start o si file addunlockedmatcher = do
|
||||||
starting "add" (ActionItemWorkTreeFile file) si $
|
starting "add" (ActionItemWorkTreeFile file) si $
|
||||||
addingExistingLink file key $ do
|
addingExistingLink file key $ do
|
||||||
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
|
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
|
||||||
next $ addFile (checkGitIgnoreOption o) file
|
next $ addFile Large (checkGitIgnoreOption o) file
|
||||||
|
|
||||||
perform :: AddOptions -> RawFilePath -> AddUnlockedMatcher -> CommandPerform
|
perform :: AddOptions -> RawFilePath -> AddUnlockedMatcher -> CommandPerform
|
||||||
perform o file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
perform o file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
||||||
|
|
|
@ -60,3 +60,11 @@ deconfigureSmudgeFilter = do
|
||||||
filter (\l -> l `notElem` stdattr && not (null l)) ls
|
filter (\l -> l `notElem` stdattr && not (null l)) ls
|
||||||
unsetConfig (ConfigKey "filter.annex.smudge")
|
unsetConfig (ConfigKey "filter.annex.smudge")
|
||||||
unsetConfig (ConfigKey "filter.annex.clean")
|
unsetConfig (ConfigKey "filter.annex.clean")
|
||||||
|
|
||||||
|
-- Params to pass to git to temporarily avoid using the smudge/clean
|
||||||
|
-- filters.
|
||||||
|
bypassSmudgeConfig :: [CommandParam]
|
||||||
|
bypassSmudgeConfig = map Param
|
||||||
|
[ "-c", "filter.annex.smudge="
|
||||||
|
, "-c", "filter.annex.clean="
|
||||||
|
]
|
||||||
|
|
|
@ -43,6 +43,7 @@ import Git.FilePath
|
||||||
import Git.Command
|
import Git.Command
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.Index
|
import Git.Index
|
||||||
|
import Config.Smudge
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
import qualified Data.ByteString as S
|
import qualified Data.ByteString as S
|
||||||
|
@ -237,15 +238,14 @@ reconcileStaged qh = do
|
||||||
liftIO $ writeFile indexcache $ showInodeCache cur
|
liftIO $ writeFile indexcache $ showInodeCache cur
|
||||||
|
|
||||||
diff =
|
diff =
|
||||||
-- Avoid using external diff command, which would be slow.
|
|
||||||
-- (The -G option may make it be used otherwise.)
|
|
||||||
[ Param "-c", Param "diff.external="
|
|
||||||
-- Avoid running smudge or clean filters, since we want the
|
-- Avoid running smudge or clean filters, since we want the
|
||||||
-- raw output, and they would block trying to access the
|
-- raw output, and they would block trying to access the
|
||||||
-- locked database. The --raw normally avoids git diff
|
-- locked database. The --raw normally avoids git diff
|
||||||
-- running them, but older versions of git need this.
|
-- running them, but older versions of git need this.
|
||||||
, Param "-c", Param "filter.annex.smudge="
|
bypassSmudgeConfig ++
|
||||||
, Param "-c", Param "filter.annex.clean="
|
-- Avoid using external diff command, which would be slow.
|
||||||
|
-- (The -G option may make it be used otherwise.)
|
||||||
|
[ Param "-c", Param "diff.external="
|
||||||
, Param "diff"
|
, Param "diff"
|
||||||
, Param "--cached"
|
, Param "--cached"
|
||||||
, Param "--raw"
|
, Param "--raw"
|
||||||
|
|
|
@ -14,3 +14,5 @@ with the existing `--force-small` too, but at least that's not the default.
|
||||||
|
|
||||||
Possible alternate approach: Unsetting filter.annex.smudge and
|
Possible alternate approach: Unsetting filter.annex.smudge and
|
||||||
filter.annex.clean when running `git add`?
|
filter.annex.clean when running `git add`?
|
||||||
|
|
||||||
|
> This approach is a winner! [[done]] --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue