From eb42bde19a17a93ecdc972f22384e4ce58e080c0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Sep 2013 14:48:42 -0400 Subject: [PATCH] sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink files from git, which can be so large it runs out of memory. --- Annex/Direct.hs | 8 +++++--- Command/Indirect.hs | 5 +++-- Command/PreCommit.hs | 7 ++++--- Git/LsFiles.hs | 15 ++++++++++----- Logs/Web.hs | 3 ++- debian/changelog | 2 ++ 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/Annex/Direct.hs b/Annex/Direct.hs index b66b2fdfd8..a6c30ad08d 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -14,8 +14,8 @@ import qualified Git.Merge import qualified Git.DiffTree as DiffTree import Git.Sha import Git.Types -import Annex.CatFile import Git.FileMode +import Annex.CatFile import qualified Annex.Queue import Logs.Location import Backend @@ -45,8 +45,10 @@ stageDirect = do {- Determine what kind of modified or deleted file this is, as - efficiently as we can, by getting any key that's associated - with it in git, as well as its stat info. -} - go (file, Just sha) = do - shakey <- catKey sha + go (file, Just sha, Just mode) = do + shakey <- if isSymLink mode + then catKey sha + else return Nothing mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file filekey <- isAnnexLink file case (shakey, filekey, mstat, toInodeCache =<< mstat) of diff --git a/Command/Indirect.hs b/Command/Indirect.hs index e63c4cb8a6..79e736d11f 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -14,6 +14,7 @@ import Command import qualified Git import qualified Git.Command import qualified Git.LsFiles +import Git.FileMode import Config import qualified Annex import Annex.Direct @@ -67,8 +68,7 @@ perform = do {- Walk tree from top and move all present direct mode files into - the annex, replacing with symlinks. Also delete direct mode - caches and mappings. -} - go (_, Nothing) = noop - go (f, Just sha) = do + go (f, Just sha, Just mode) | isSymLink mode = do r <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus f case r of Just s @@ -80,6 +80,7 @@ perform = do maybe noop (fromdirect f) =<< catKey sha _ -> noop + go _ = noop fromdirect f k = do showStart "indirect" f diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index c6d9dd278a..c35cf61e15 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -16,6 +16,7 @@ import qualified Git.Ref import Annex.CatFile import Annex.Content.Direct import Git.Sha +import Git.FileMode def :: [Command] def = [command "pre-commit" paramPaths seek SectionPlumbing @@ -44,10 +45,10 @@ startDirect _ = next $ do next $ liftIO clean where go diff = do - withkey (Git.DiffTree.srcsha diff) removeAssociatedFile - withkey (Git.DiffTree.dstsha diff) addAssociatedFile + withkey (Git.DiffTree.srcsha diff) (Git.DiffTree.srcmode diff) removeAssociatedFile + withkey (Git.DiffTree.dstsha diff) (Git.DiffTree.dstmode diff) addAssociatedFile where - withkey sha a = when (sha /= nullSha) $ do + withkey sha mode a = when (sha /= nullSha && isSymLink mode) $ do k <- catKey sha case k of Nothing -> noop diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index e2e29ea36f..8a5d4bd6a2 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -28,6 +28,9 @@ import Git.Command import Git.Types import Git.Sha +import Numeric +import System.Posix.Types + {- Scans for files that are checked into git at the specified locations. -} inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) inRepo l = pipeNullSplit $ Params "ls-files --cached -z --" : map File l @@ -78,16 +81,16 @@ staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix {- Returns details about files that are staged in the index, - as well as files not yet in git. Skips ignored files. -} -stagedOthersDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool) +stagedOthersDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool) stagedOthersDetails = stagedDetails' [Params "--others --exclude-standard"] {- Returns details about all files that are staged in the index. -} -stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool) +stagedDetails :: [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool) stagedDetails = stagedDetails' [] {- Gets details about staged files, including the Sha of their staged - contents. -} -stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha)], IO Bool) +stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([(FilePath, Maybe Sha, Maybe FileMode)], IO Bool) stagedDetails' ps l repo = do (ls, cleanup) <- pipeNullSplit params repo return (map parse ls, cleanup) @@ -95,10 +98,12 @@ stagedDetails' ps l repo = do params = Params "ls-files --stage -z" : ps ++ Param "--" : map File l parse s - | null file = (s, Nothing) - | otherwise = (file, extractSha $ take shaSize $ drop 7 metadata) + | null file = (s, Nothing, Nothing) + | otherwise = (file, extractSha $ take shaSize rest, readmode mode) where (metadata, file) = separate (== '\t') s + (mode, rest) = separate (== ' ') metadata + readmode = headMaybe . readOct >=*> fst {- Returns a list of the files in the specified locations that are staged - for commit, and whose type has changed. -} diff --git a/Logs/Web.hs b/Logs/Web.hs index 0239f23350..ede600ec28 100644 --- a/Logs/Web.hs +++ b/Logs/Web.hs @@ -18,6 +18,7 @@ module Logs.Web ( ) where import qualified Data.ByteString.Lazy.Char8 as L +import Data.Tuple.Utils import Common.Annex import Logs @@ -70,7 +71,7 @@ knownUrls = do Annex.Branch.withIndex $ do top <- fromRepo Git.repoPath (l, cleanup) <- inRepo $ Git.LsFiles.stagedDetails [top] - r <- mapM (geturls . snd) $ filter (isUrlLog . fst) l + r <- mapM (geturls . snd3) $ filter (isUrlLog . fst3) l void $ liftIO cleanup return $ concat r where diff --git a/debian/changelog b/debian/changelog index 1fd1375189..6cc20e4128 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,6 +18,8 @@ git-annex (4.20130912) UNRELEASED; urgency=low numcopies levels. (--fast avoids calculating these) * gcrypt: Ensure that signing key is set to one of the participants keys. * webapp: Show encryption information when editing a remote. + * sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink + files from git, which can be so large it runs out of memory. -- Joey Hess Thu, 12 Sep 2013 12:14:46 -0400