sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink files from git, which can be so large it runs out of memory.

This commit is contained in:
Joey Hess 2013-09-19 14:48:42 -04:00
parent 66b6a9cc4e
commit eb42bde19a
6 changed files with 26 additions and 14 deletions

View file

@ -14,8 +14,8 @@ import qualified Git.Merge
import qualified Git.DiffTree as DiffTree
import Git.Sha
import Git.Types
import Annex.CatFile
import Git.FileMode
import Annex.CatFile
import qualified Annex.Queue
import Logs.Location
import Backend
@ -45,8 +45,10 @@ stageDirect = do
{- Determine what kind of modified or deleted file this is, as
- efficiently as we can, by getting any key that's associated
- with it in git, as well as its stat info. -}
go (file, Just sha) = do
shakey <- catKey sha
go (file, Just sha, Just mode) = do
shakey <- if isSymLink mode
then catKey sha
else return Nothing
mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
filekey <- isAnnexLink file
case (shakey, filekey, mstat, toInodeCache =<< mstat) of

View file

@ -14,6 +14,7 @@ import Command
import qualified Git
import qualified Git.Command
import qualified Git.LsFiles
import Git.FileMode
import Config
import qualified Annex
import Annex.Direct
@ -67,8 +68,7 @@ perform = do
{- Walk tree from top and move all present direct mode files into
- the annex, replacing with symlinks. Also delete direct mode
- caches and mappings. -}
go (_, Nothing) = noop
go (f, Just sha) = do
go (f, Just sha, Just mode) | isSymLink mode = do
r <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus f
case r of
Just s
@ -80,6 +80,7 @@ perform = do
maybe noop (fromdirect f)
=<< catKey sha
_ -> noop
go _ = noop
fromdirect f k = do
showStart "indirect" f

View file

@ -16,6 +16,7 @@ import qualified Git.Ref
import Annex.CatFile
import Annex.Content.Direct
import Git.Sha
import Git.FileMode
def :: [Command]
def = [command "pre-commit" paramPaths seek SectionPlumbing
@ -44,10 +45,10 @@ startDirect _ = next $ do
next $ liftIO clean
where
go diff = do
withkey (Git.DiffTree.srcsha diff) removeAssociatedFile
withkey (Git.DiffTree.dstsha diff) addAssociatedFile
withkey (Git.DiffTree.srcsha diff) (Git.DiffTree.srcmode diff) removeAssociatedFile
withkey (Git.DiffTree.dstsha diff) (Git.DiffTree.dstmode diff) addAssociatedFile
where
withkey sha a = when (sha /= nullSha) $ do
withkey sha mode a = when (sha /= nullSha && isSymLink mode) $ do
k <- catKey sha
case k of
Nothing -> noop

View file

@ -28,6 +28,9 @@ import Git.Command
import Git.Types
import Git.Sha
import Numeric
import System.Posix.Types
{- Scans for files that are checked into git at the specified locations. -}
inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
inRepo l = pipeNullSplit $ Params "ls-files --cached -z --" : map File l
@ -78,16 +81,16 @@ staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix
{- Returns details about files that are staged in the index,
- as well as files not yet in git. Skips ignored files. -}
stagedOthersDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool)
stagedOthersDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedOthersDetails = stagedDetails' [Params "--others --exclude-standard"]
{- Returns details about all files that are staged in the index. -}
stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool)
stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedDetails = stagedDetails' []
{- Gets details about staged files, including the Sha of their staged
- contents. -}
stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool)
stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedDetails' ps l repo = do
(ls, cleanup) <- pipeNullSplit params repo
return (map parse ls, cleanup)
@ -95,10 +98,12 @@ stagedDetails' ps l repo = do
params = Params "ls-files --stage -z" : ps ++
Param "--" : map File l
parse s
| null file = (s, Nothing)
| otherwise = (file, extractSha $ take shaSize $ drop 7 metadata)
| null file = (s, Nothing, Nothing)
| otherwise = (file, extractSha $ take shaSize rest, readmode mode)
where
(metadata, file) = separate (== '\t') s
(mode, rest) = separate (== ' ') metadata
readmode = headMaybe . readOct >=*> fst
{- Returns a list of the files in the specified locations that are staged
- for commit, and whose type has changed. -}

View file

@ -18,6 +18,7 @@ module Logs.Web (
) where
import qualified Data.ByteString.Lazy.Char8 as L
import Data.Tuple.Utils
import Common.Annex
import Logs
@ -70,7 +71,7 @@ knownUrls = do
Annex.Branch.withIndex $ do
top <- fromRepo Git.repoPath
(l, cleanup) <- inRepo $ Git.LsFiles.stagedDetails [top]
r <- mapM (geturls . snd) $ filter (isUrlLog . fst) l
r <- mapM (geturls . snd3) $ filter (isUrlLog . fst3) l
void $ liftIO cleanup
return $ concat r
where

2
debian/changelog vendored
View file

@ -18,6 +18,8 @@ git-annex (4.20130912) UNRELEASED; urgency=low
numcopies levels. (--fast avoids calculating these)
* gcrypt: Ensure that signing key is set to one of the participants keys.
* webapp: Show encryption information when editing a remote.
* sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink
files from git, which can be so large it runs out of memory.
-- Joey Hess <joeyh@debian.org> Thu, 12 Sep 2013 12:14:46 -0400