add: Significantly speed up adding lots of non-large files to git
* add: Significantly speed up adding lots of non-large files to git, by disabling the annex smudge filter when running git add. * add --force-small: Run git add rather than updating the index itself, so any other smudge filters than the annex one that may be enabled will be used.
This commit is contained in:
parent
0b2b666a38
commit
5ce61c6b2a
6 changed files with 43 additions and 40 deletions
|
@ -20,6 +20,7 @@ import Git.Index
|
|||
import Git.Env
|
||||
import qualified Annex
|
||||
import qualified Annex.Queue
|
||||
import Config.Smudge
|
||||
|
||||
{- Runs an action using a different git index file. -}
|
||||
withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a
|
||||
|
@ -67,16 +68,12 @@ withIndexFile i = withAltRepo usecachedgitenv restoregitenv
|
|||
- Smudge and clean filters are disabled in this work tree. -}
|
||||
withWorkTree :: FilePath -> Annex a -> Annex a
|
||||
withWorkTree d a = withAltRepo
|
||||
(\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ disableSmudgeConfig }, ()))
|
||||
(\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ bypassSmudgeConfig }, ()))
|
||||
(\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g })
|
||||
(const a)
|
||||
where
|
||||
modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) }
|
||||
modlocation _ = error "withWorkTree of non-local git repo"
|
||||
disableSmudgeConfig = map Param
|
||||
[ "-c", "filter.annex.smudge="
|
||||
, "-c", "filter.annex.clean="
|
||||
]
|
||||
|
||||
{- Runs an action with the git index file and HEAD, and a few other
|
||||
- files that are related to the work tree coming from an overlay
|
||||
|
|
10
CHANGELOG
10
CHANGELOG
|
@ -1,3 +1,13 @@
|
|||
git-annex (8.20201130) UNRELEASED; urgency=medium
|
||||
|
||||
* add: Significantly speed up adding lots of non-large files to git,
|
||||
by disabling the annex smudge filter when running git add.
|
||||
* add --force-small: Run git add rather than updating the index itself,
|
||||
so any other smudge filters than the annex one that may be enabled will
|
||||
be used.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 04 Jan 2021 12:52:41 -0400
|
||||
|
||||
git-annex (8.20201129) upstream; urgency=medium
|
||||
|
||||
* New borg special remote. This is a new kind of remote, that examines
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -17,13 +17,10 @@ import qualified Database.Keys
|
|||
import Annex.FileMatcher
|
||||
import Annex.Link
|
||||
import Annex.Tmp
|
||||
import Annex.HashObject
|
||||
import Messages.Progress
|
||||
import Git.Types
|
||||
import Git.FilePath
|
||||
import Config.GitConfig
|
||||
import qualified Git.UpdateIndex
|
||||
import Utility.FileMode
|
||||
import Config.Smudge
|
||||
import Utility.OptParse
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
|
@ -119,37 +116,26 @@ startSmall o si file =
|
|||
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool
|
||||
addSmall ci file = do
|
||||
showNote "non-large file; adding content to git repository"
|
||||
addFile ci file
|
||||
addFile Small ci file
|
||||
|
||||
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
|
||||
startSmallOverridden o si file =
|
||||
starting "add" (ActionItemWorkTreeFile file) si $
|
||||
next $ addSmallOverridden o file
|
||||
|
||||
addSmallOverridden :: AddOptions -> RawFilePath -> Annex Bool
|
||||
addSmallOverridden o file = do
|
||||
starting "add" (ActionItemWorkTreeFile file) si $ next $ do
|
||||
showNote "adding content to git repository"
|
||||
s <- liftIO $ R.getSymbolicLinkStatus file
|
||||
if not (isRegularFile s)
|
||||
then addFile (checkGitIgnoreOption o) file
|
||||
else do
|
||||
-- Can't use addFile because the clean filter will
|
||||
-- honor annex.largefiles and it has been overridden.
|
||||
-- Instead, hash the file and add to the index.
|
||||
sha <- hashFile file
|
||||
let ty = if isExecutable (fileMode s)
|
||||
then TreeExecutable
|
||||
else TreeFile
|
||||
Annex.Queue.addUpdateIndex =<<
|
||||
inRepo (Git.UpdateIndex.stageFile sha ty (fromRawFilePath file))
|
||||
return True
|
||||
addFile Small (checkGitIgnoreOption o) file
|
||||
|
||||
addFile :: CheckGitIgnore -> RawFilePath -> Annex Bool
|
||||
addFile ci file = do
|
||||
data SmallOrLarge = Small | Large
|
||||
|
||||
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
|
||||
addFile smallorlarge ci file = do
|
||||
ps <- gitAddParams ci
|
||||
Annex.Queue.addCommand [] "add" (ps++[Param "--"])
|
||||
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
|
||||
[fromRawFilePath file]
|
||||
return True
|
||||
where
|
||||
cps = case smallorlarge of
|
||||
Large -> []
|
||||
Small -> bypassSmudgeConfig
|
||||
|
||||
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
||||
start o si file addunlockedmatcher = do
|
||||
|
@ -164,7 +150,7 @@ start o si file addunlockedmatcher = do
|
|||
| otherwise ->
|
||||
starting "add" (ActionItemWorkTreeFile file) si $
|
||||
if isSymbolicLink s
|
||||
then next $ addFile (checkGitIgnoreOption o) file
|
||||
then next $ addFile Small (checkGitIgnoreOption o) file
|
||||
else perform o file addunlockedmatcher
|
||||
addpresent key =
|
||||
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||
|
@ -180,7 +166,7 @@ start o si file addunlockedmatcher = do
|
|||
starting "add" (ActionItemWorkTreeFile file) si $
|
||||
addingExistingLink file key $ do
|
||||
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
|
||||
next $ addFile (checkGitIgnoreOption o) file
|
||||
next $ addFile Large (checkGitIgnoreOption o) file
|
||||
|
||||
perform :: AddOptions -> RawFilePath -> AddUnlockedMatcher -> CommandPerform
|
||||
perform o file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
||||
|
|
|
@ -60,3 +60,11 @@ deconfigureSmudgeFilter = do
|
|||
filter (\l -> l `notElem` stdattr && not (null l)) ls
|
||||
unsetConfig (ConfigKey "filter.annex.smudge")
|
||||
unsetConfig (ConfigKey "filter.annex.clean")
|
||||
|
||||
-- Params to pass to git to temporarily avoid using the smudge/clean
|
||||
-- filters.
|
||||
bypassSmudgeConfig :: [CommandParam]
|
||||
bypassSmudgeConfig = map Param
|
||||
[ "-c", "filter.annex.smudge="
|
||||
, "-c", "filter.annex.clean="
|
||||
]
|
||||
|
|
|
@ -43,6 +43,7 @@ import Git.FilePath
|
|||
import Git.Command
|
||||
import Git.Types
|
||||
import Git.Index
|
||||
import Config.Smudge
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
|
@ -237,15 +238,14 @@ reconcileStaged qh = do
|
|||
liftIO $ writeFile indexcache $ showInodeCache cur
|
||||
|
||||
diff =
|
||||
-- Avoid using external diff command, which would be slow.
|
||||
-- (The -G option may make it be used otherwise.)
|
||||
[ Param "-c", Param "diff.external="
|
||||
-- Avoid running smudge or clean filters, since we want the
|
||||
-- raw output, and they would block trying to access the
|
||||
-- locked database. The --raw normally avoids git diff
|
||||
-- running them, but older versions of git need this.
|
||||
, Param "-c", Param "filter.annex.smudge="
|
||||
, Param "-c", Param "filter.annex.clean="
|
||||
bypassSmudgeConfig ++
|
||||
-- Avoid using external diff command, which would be slow.
|
||||
-- (The -G option may make it be used otherwise.)
|
||||
[ Param "-c", Param "diff.external="
|
||||
, Param "diff"
|
||||
, Param "--cached"
|
||||
, Param "--raw"
|
||||
|
|
|
@ -14,3 +14,5 @@ with the existing `--force-small` too, but at least that's not the default.
|
|||
|
||||
Possible alternate approach: Unsetting filter.annex.smudge and
|
||||
filter.annex.clean when running `git add`?
|
||||
|
||||
> This approach is a winner! [[done]] --[[Joey]]
|
||||
|
|
Loading…
Reference in a new issue