sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink files from git, which can be so large it runs out of memory.

This commit is contained in:
Joey Hess 2013-09-19 14:48:42 -04:00
parent 66b6a9cc4e
commit eb42bde19a
6 changed files with 26 additions and 14 deletions

View file

@ -14,8 +14,8 @@ import qualified Git.Merge
import qualified Git.DiffTree as DiffTree import qualified Git.DiffTree as DiffTree
import Git.Sha import Git.Sha
import Git.Types import Git.Types
import Annex.CatFile
import Git.FileMode import Git.FileMode
import Annex.CatFile
import qualified Annex.Queue import qualified Annex.Queue
import Logs.Location import Logs.Location
import Backend import Backend
@ -45,8 +45,10 @@ stageDirect = do
{- Determine what kind of modified or deleted file this is, as {- Determine what kind of modified or deleted file this is, as
- efficiently as we can, by getting any key that's associated - efficiently as we can, by getting any key that's associated
- with it in git, as well as its stat info. -} - with it in git, as well as its stat info. -}
go (file, Just sha) = do go (file, Just sha, Just mode) = do
shakey <- catKey sha shakey <- if isSymLink mode
then catKey sha
else return Nothing
mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
filekey <- isAnnexLink file filekey <- isAnnexLink file
case (shakey, filekey, mstat, toInodeCache =<< mstat) of case (shakey, filekey, mstat, toInodeCache =<< mstat) of

View file

@ -14,6 +14,7 @@ import Command
import qualified Git import qualified Git
import qualified Git.Command import qualified Git.Command
import qualified Git.LsFiles import qualified Git.LsFiles
import Git.FileMode
import Config import Config
import qualified Annex import qualified Annex
import Annex.Direct import Annex.Direct
@ -67,8 +68,7 @@ perform = do
{- Walk tree from top and move all present direct mode files into {- Walk tree from top and move all present direct mode files into
- the annex, replacing with symlinks. Also delete direct mode - the annex, replacing with symlinks. Also delete direct mode
- caches and mappings. -} - caches and mappings. -}
go (_, Nothing) = noop go (f, Just sha, Just mode) | isSymLink mode = do
go (f, Just sha) = do
r <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus f r <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus f
case r of case r of
Just s Just s
@ -80,6 +80,7 @@ perform = do
maybe noop (fromdirect f) maybe noop (fromdirect f)
=<< catKey sha =<< catKey sha
_ -> noop _ -> noop
go _ = noop
fromdirect f k = do fromdirect f k = do
showStart "indirect" f showStart "indirect" f

View file

@ -16,6 +16,7 @@ import qualified Git.Ref
import Annex.CatFile import Annex.CatFile
import Annex.Content.Direct import Annex.Content.Direct
import Git.Sha import Git.Sha
import Git.FileMode
def :: [Command] def :: [Command]
def = [command "pre-commit" paramPaths seek SectionPlumbing def = [command "pre-commit" paramPaths seek SectionPlumbing
@ -44,10 +45,10 @@ startDirect _ = next $ do
next $ liftIO clean next $ liftIO clean
where where
go diff = do go diff = do
withkey (Git.DiffTree.srcsha diff) removeAssociatedFile withkey (Git.DiffTree.srcsha diff) (Git.DiffTree.srcmode diff) removeAssociatedFile
withkey (Git.DiffTree.dstsha diff) addAssociatedFile withkey (Git.DiffTree.dstsha diff) (Git.DiffTree.dstmode diff) addAssociatedFile
where where
withkey sha a = when (sha /= nullSha) $ do withkey sha mode a = when (sha /= nullSha && isSymLink mode) $ do
k <- catKey sha k <- catKey sha
case k of case k of
Nothing -> noop Nothing -> noop

View file

@ -28,6 +28,9 @@ import Git.Command
import Git.Types import Git.Types
import Git.Sha import Git.Sha
import Numeric
import System.Posix.Types
{- Scans for files that are checked into git at the specified locations. -} {- Scans for files that are checked into git at the specified locations. -}
inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
inRepo l = pipeNullSplit $ Params "ls-files --cached -z --" : map File l inRepo l = pipeNullSplit $ Params "ls-files --cached -z --" : map File l
@ -78,16 +81,16 @@ staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix
{- Returns details about files that are staged in the index, {- Returns details about files that are staged in the index,
- as well as files not yet in git. Skips ignored files. -} - as well as files not yet in git. Skips ignored files. -}
stagedOthersDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool) stagedOthersDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedOthersDetails = stagedDetails' [Params "--others --exclude-standard"] stagedOthersDetails = stagedDetails' [Params "--others --exclude-standard"]
{- Returns details about all files that are staged in the index. -} {- Returns details about all files that are staged in the index. -}
stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool) stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedDetails = stagedDetails' [] stagedDetails = stagedDetails' []
{- Gets details about staged files, including the Sha of their staged {- Gets details about staged files, including the Sha of their staged
- contents. -} - contents. -}
stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool) stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool)
stagedDetails' ps l repo = do stagedDetails' ps l repo = do
(ls, cleanup) <- pipeNullSplit params repo (ls, cleanup) <- pipeNullSplit params repo
return (map parse ls, cleanup) return (map parse ls, cleanup)
@ -95,10 +98,12 @@ stagedDetails' ps l repo = do
params = Params "ls-files --stage -z" : ps ++ params = Params "ls-files --stage -z" : ps ++
Param "--" : map File l Param "--" : map File l
parse s parse s
| null file = (s, Nothing) | null file = (s, Nothing, Nothing)
| otherwise = (file, extractSha $ take shaSize $ drop 7 metadata) | otherwise = (file, extractSha $ take shaSize rest, readmode mode)
where where
(metadata, file) = separate (== '\t') s (metadata, file) = separate (== '\t') s
(mode, rest) = separate (== ' ') metadata
readmode = headMaybe . readOct >=*> fst
{- Returns a list of the files in the specified locations that are staged {- Returns a list of the files in the specified locations that are staged
- for commit, and whose type has changed. -} - for commit, and whose type has changed. -}

View file

@ -18,6 +18,7 @@ module Logs.Web (
) where ) where
import qualified Data.ByteString.Lazy.Char8 as L import qualified Data.ByteString.Lazy.Char8 as L
import Data.Tuple.Utils
import Common.Annex import Common.Annex
import Logs import Logs
@ -70,7 +71,7 @@ knownUrls = do
Annex.Branch.withIndex $ do Annex.Branch.withIndex $ do
top <- fromRepo Git.repoPath top <- fromRepo Git.repoPath
(l, cleanup) <- inRepo $ Git.LsFiles.stagedDetails [top] (l, cleanup) <- inRepo $ Git.LsFiles.stagedDetails [top]
r <- mapM (geturls . snd) $ filter (isUrlLog . fst) l r <- mapM (geturls . snd3) $ filter (isUrlLog . fst3) l
void $ liftIO cleanup void $ liftIO cleanup
return $ concat r return $ concat r
where where

2
debian/changelog vendored
View file

@ -18,6 +18,8 @@ git-annex (4.20130912) UNRELEASED; urgency=low
numcopies levels. (--fast avoids calculating these) numcopies levels. (--fast avoids calculating these)
* gcrypt: Ensure that signing key is set to one of the participants keys. * gcrypt: Ensure that signing key is set to one of the participants keys.
* webapp: Show encryption information when editing a remote. * webapp: Show encryption information when editing a remote.
* sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink
files from git, which can be so large it runs out of memory.
-- Joey Hess <joeyh@debian.org> Thu, 12 Sep 2013 12:14:46 -0400 -- Joey Hess <joeyh@debian.org> Thu, 12 Sep 2013 12:14:46 -0400