more completely solve catKey memory leak
Done using a mode witness, which ensures it's fixed everywhere. Fixing catFileKey was a bear, because git cat-file does not provide a nice way to query for the mode of a file and there is no other efficient way to do it. Oh, for libgit2.. Note that I am looking at tree objects from HEAD, rather than the index. Because I cat-file cannot show a tree object for the index. So this fix is technically incomplete. The only cases where it matters are: 1. A new large file has been directly staged in git, but not committed. 2. A file that was committed to HEAD as a symlink has been staged directly in the index. This could be fixed a lot better using libgit2.
This commit is contained in:
parent
f26c996dc6
commit
006cf7976f
10 changed files with 71 additions and 27 deletions
|
@ -8,6 +8,7 @@
|
||||||
module Annex.CatFile (
|
module Annex.CatFile (
|
||||||
catFile,
|
catFile,
|
||||||
catObject,
|
catObject,
|
||||||
|
catTree,
|
||||||
catObjectDetails,
|
catObjectDetails,
|
||||||
catFileHandle,
|
catFileHandle,
|
||||||
catKey,
|
catKey,
|
||||||
|
@ -17,6 +18,7 @@ module Annex.CatFile (
|
||||||
|
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
import System.PosixCompat.Types
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import qualified Git
|
import qualified Git
|
||||||
|
@ -24,6 +26,7 @@ import qualified Git.CatFile
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
|
import Git.FileMode
|
||||||
|
|
||||||
catFile :: Git.Branch -> FilePath -> Annex L.ByteString
|
catFile :: Git.Branch -> FilePath -> Annex L.ByteString
|
||||||
catFile branch file = do
|
catFile branch file = do
|
||||||
|
@ -35,6 +38,11 @@ catObject ref = do
|
||||||
h <- catFileHandle
|
h <- catFileHandle
|
||||||
liftIO $ Git.CatFile.catObject h ref
|
liftIO $ Git.CatFile.catObject h ref
|
||||||
|
|
||||||
|
catTree :: Git.Ref -> Annex [(FilePath, FileMode)]
|
||||||
|
catTree ref = do
|
||||||
|
h <- catFileHandle
|
||||||
|
liftIO $ Git.CatFile.catTree h ref
|
||||||
|
|
||||||
catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha))
|
catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha))
|
||||||
catObjectDetails ref = do
|
catObjectDetails ref = do
|
||||||
h <- catFileHandle
|
h <- catFileHandle
|
||||||
|
@ -55,13 +63,39 @@ catFileHandle = do
|
||||||
Annex.changeState $ \s -> s { Annex.catfilehandles = m' }
|
Annex.changeState $ \s -> s { Annex.catfilehandles = m' }
|
||||||
return h
|
return h
|
||||||
|
|
||||||
{- From the Sha or Ref of a symlink back to the key. -}
|
{- From the Sha or Ref of a symlink back to the key.
|
||||||
catKey :: Ref -> Annex (Maybe Key)
|
-
|
||||||
catKey ref = do
|
- Requires a mode witness, to guarantee that the file is a symlink.
|
||||||
l <- fromInternalGitPath . encodeW8 . L.unpack <$> catObject ref
|
-}
|
||||||
return $ if isLinkToAnnex l
|
catKey :: Ref -> FileMode -> Annex (Maybe Key)
|
||||||
then fileKey $ takeFileName l
|
catKey ref mode
|
||||||
else Nothing
|
| isSymLink mode = do
|
||||||
|
l <- fromInternalGitPath . encodeW8 . L.unpack <$> catObject ref
|
||||||
|
return $ if isLinkToAnnex l
|
||||||
|
then fileKey $ takeFileName l
|
||||||
|
else Nothing
|
||||||
|
| otherwise = return Nothing
|
||||||
|
|
||||||
|
{- Looks up the file mode corresponding to the Ref using the running
|
||||||
|
- cat-file.
|
||||||
|
-
|
||||||
|
- Currently this always has to look in HEAD, because cat-file --batch
|
||||||
|
- does not offer a way to specify that we want to look up a tree object
|
||||||
|
- in the index. So if the index has a file staged not as a symlink,
|
||||||
|
- and it is a sylink in head, the wrong mode is gotten. This is a bug.
|
||||||
|
- Also, we have to assume the file is a symlink if it's not yet committed
|
||||||
|
- to HEAD.
|
||||||
|
-}
|
||||||
|
catKeyChecked :: Bool -> Ref -> Annex (Maybe Key)
|
||||||
|
catKeyChecked needhead ref@(Ref r) =
|
||||||
|
catKey ref =<< findmode <$> catTree treeref
|
||||||
|
where
|
||||||
|
pathparts = split "/" r
|
||||||
|
dir = intercalate "/" $ take (length pathparts - 1) pathparts
|
||||||
|
file = fromMaybe "" $ lastMaybe pathparts
|
||||||
|
treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/"
|
||||||
|
findmode = fromMaybe symLinkMode . headMaybe .
|
||||||
|
map snd . filter (\p -> fst p == file)
|
||||||
|
|
||||||
{- From a file in the repository back to the key.
|
{- From a file in the repository back to the key.
|
||||||
-
|
-
|
||||||
|
@ -76,7 +110,8 @@ catKey ref = do
|
||||||
-
|
-
|
||||||
- For command-line git-annex use, that doesn't matter. It's perfectly
|
- For command-line git-annex use, that doesn't matter. It's perfectly
|
||||||
- reasonable for things staged in the index after the currently running
|
- reasonable for things staged in the index after the currently running
|
||||||
- git-annex process to not be noticed by it.
|
- git-annex process to not be noticed by it. However, we do want to see
|
||||||
|
- what's in the index, since it may have uncommitted changes not in HEAD>
|
||||||
-
|
-
|
||||||
- For the assistant, this is much more of a problem, since it commits
|
- For the assistant, this is much more of a problem, since it commits
|
||||||
- files and then needs to be able to immediately look up their keys.
|
- files and then needs to be able to immediately look up their keys.
|
||||||
|
@ -89,8 +124,8 @@ catKey ref = do
|
||||||
catKeyFile :: FilePath -> Annex (Maybe Key)
|
catKeyFile :: FilePath -> Annex (Maybe Key)
|
||||||
catKeyFile f = ifM (Annex.getState Annex.daemon)
|
catKeyFile f = ifM (Annex.getState Annex.daemon)
|
||||||
( catKeyFileHEAD f
|
( catKeyFileHEAD f
|
||||||
, catKey $ Ref $ ":./" ++ f
|
, catKeyChecked True (Ref $ ":./" ++ f)
|
||||||
)
|
)
|
||||||
|
|
||||||
catKeyFileHEAD :: FilePath -> Annex (Maybe Key)
|
catKeyFileHEAD :: FilePath -> Annex (Maybe Key)
|
||||||
catKeyFileHEAD f = catKey $ Ref $ "HEAD:./" ++ f
|
catKeyFileHEAD f = catKeyChecked False (Ref $ "HEAD:./" ++ f)
|
||||||
|
|
|
@ -14,7 +14,6 @@ import qualified Git.Merge
|
||||||
import qualified Git.DiffTree as DiffTree
|
import qualified Git.DiffTree as DiffTree
|
||||||
import Git.Sha
|
import Git.Sha
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.FileMode
|
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
|
@ -46,9 +45,7 @@ stageDirect = do
|
||||||
- efficiently as we can, by getting any key that's associated
|
- efficiently as we can, by getting any key that's associated
|
||||||
- with it in git, as well as its stat info. -}
|
- with it in git, as well as its stat info. -}
|
||||||
go (file, Just sha, Just mode) = do
|
go (file, Just sha, Just mode) = do
|
||||||
shakey <- if isSymLink mode
|
shakey <- catKey sha mode
|
||||||
then catKey sha
|
|
||||||
else return Nothing
|
|
||||||
mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
|
mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
|
||||||
filekey <- isAnnexLink file
|
filekey <- isAnnexLink file
|
||||||
case (shakey, filekey, mstat, toInodeCache =<< mstat) of
|
case (shakey, filekey, mstat, toInodeCache =<< mstat) of
|
||||||
|
@ -149,10 +146,9 @@ mergeDirectCleanup d oldsha newsha = do
|
||||||
where
|
where
|
||||||
go getsha getmode a araw
|
go getsha getmode a araw
|
||||||
| getsha item == nullSha = noop
|
| getsha item == nullSha = noop
|
||||||
| isSymLink (getmode item) =
|
| otherwise =
|
||||||
maybe (araw f) (\k -> void $ a k f)
|
maybe (araw f) (\k -> void $ a k f)
|
||||||
=<< catKey (getsha item)
|
=<< catKey (getsha item) (getmode item)
|
||||||
| otherwise = araw f
|
|
||||||
f = DiffTree.file item
|
f = DiffTree.file item
|
||||||
|
|
||||||
moveout = removeDirect
|
moveout = removeDirect
|
||||||
|
|
|
@ -78,7 +78,7 @@ perform = do
|
||||||
return Nothing
|
return Nothing
|
||||||
| otherwise ->
|
| otherwise ->
|
||||||
maybe noop (fromdirect f)
|
maybe noop (fromdirect f)
|
||||||
=<< catKey sha
|
=<< catKey sha mode
|
||||||
_ -> noop
|
_ -> noop
|
||||||
go _ = noop
|
go _ = noop
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,6 @@ import qualified Git.Ref
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import Annex.Content.Direct
|
import Annex.Content.Direct
|
||||||
import Git.Sha
|
import Git.Sha
|
||||||
import Git.FileMode
|
|
||||||
|
|
||||||
def :: [Command]
|
def :: [Command]
|
||||||
def = [command "pre-commit" paramPaths seek SectionPlumbing
|
def = [command "pre-commit" paramPaths seek SectionPlumbing
|
||||||
|
@ -48,8 +47,8 @@ startDirect _ = next $ do
|
||||||
withkey (Git.DiffTree.srcsha diff) (Git.DiffTree.srcmode diff) removeAssociatedFile
|
withkey (Git.DiffTree.srcsha diff) (Git.DiffTree.srcmode diff) removeAssociatedFile
|
||||||
withkey (Git.DiffTree.dstsha diff) (Git.DiffTree.dstmode diff) addAssociatedFile
|
withkey (Git.DiffTree.dstsha diff) (Git.DiffTree.dstmode diff) addAssociatedFile
|
||||||
where
|
where
|
||||||
withkey sha mode a = when (sha /= nullSha && isSymLink mode) $ do
|
withkey sha mode a = when (sha /= nullSha) $ do
|
||||||
k <- catKey sha
|
k <- catKey sha mode
|
||||||
case k of
|
case k of
|
||||||
Nothing -> noop
|
Nothing -> noop
|
||||||
Just key -> void $ a key (Git.DiffTree.file diff)
|
Just key -> void $ a key (Git.DiffTree.file diff)
|
||||||
|
|
|
@ -29,6 +29,7 @@ import qualified Remote.Git
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Config
|
import Config
|
||||||
import Annex.ReplaceFile
|
import Annex.ReplaceFile
|
||||||
|
import Git.FileMode
|
||||||
|
|
||||||
import Data.Hash.MD5
|
import Data.Hash.MD5
|
||||||
|
|
||||||
|
@ -321,7 +322,7 @@ resolveMerge' u
|
||||||
case msha of
|
case msha of
|
||||||
Nothing -> a Nothing
|
Nothing -> a Nothing
|
||||||
Just sha -> do
|
Just sha -> do
|
||||||
key <- catKey sha
|
key <- catKey sha symLinkMode
|
||||||
maybe (return False) (a . Just) key
|
maybe (return False) (a . Just) key
|
||||||
|
|
||||||
{- The filename to use when resolving a conflicted merge of a file,
|
{- The filename to use when resolving a conflicted merge of a file,
|
||||||
|
|
|
@ -130,4 +130,4 @@ catTree h treeref = go <$> catObjectDetails h treeref
|
||||||
parsemodefile b =
|
parsemodefile b =
|
||||||
let (modestr, file) = separate (== ' ') (encodeW8 $ L.unpack b)
|
let (modestr, file) = separate (== ' ') (encodeW8 $ L.unpack b)
|
||||||
in (file, readmode modestr)
|
in (file, readmode modestr)
|
||||||
readmode = fst . Prelude.head . readOct
|
readmode = fst . fromMaybe (0, undefined) . headMaybe . readOct
|
||||||
|
|
|
@ -13,8 +13,11 @@ import Utility.FileMode
|
||||||
|
|
||||||
import System.PosixCompat.Types
|
import System.PosixCompat.Types
|
||||||
|
|
||||||
|
symLinkMode :: FileMode
|
||||||
|
symLinkMode = 40960
|
||||||
|
|
||||||
{- Git uses a special file mode to indicate a symlink. This is the case
|
{- Git uses a special file mode to indicate a symlink. This is the case
|
||||||
- even on Windows, so we hard code the valuse here, rather than using
|
- even on Windows, so we hard code the valuse here, rather than using
|
||||||
- System.Posix.Files.symbolicLinkMode. -}
|
- System.Posix.Files.symbolicLinkMode. -}
|
||||||
isSymLink :: FileMode -> Bool
|
isSymLink :: FileMode -> Bool
|
||||||
isSymLink = checkMode 40960
|
isSymLink = checkMode symLinkMode
|
||||||
|
|
4
debian/changelog
vendored
4
debian/changelog
vendored
|
@ -18,8 +18,8 @@ git-annex (4.20130912) UNRELEASED; urgency=low
|
||||||
numcopies levels. (--fast avoids calculating these)
|
numcopies levels. (--fast avoids calculating these)
|
||||||
* gcrypt: Ensure that signing key is set to one of the participants keys.
|
* gcrypt: Ensure that signing key is set to one of the participants keys.
|
||||||
* webapp: Show encryption information when editing a remote.
|
* webapp: Show encryption information when editing a remote.
|
||||||
* sync, pre-commit, indirect: Avoid unnecessarily catting non-symlink
|
* Avoid unnecessarily catting non-symlink files from git, which can be
|
||||||
files from git, which can be so large it runs out of memory.
|
so large it runs out of memory.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Thu, 12 Sep 2013 12:14:46 -0400
|
-- Joey Hess <joeyh@debian.org> Thu, 12 Sep 2013 12:14:46 -0400
|
||||||
|
|
||||||
|
|
|
@ -61,3 +61,9 @@ commit git-annex: out of memory (requested 985661440 bytes)
|
||||||
|
|
||||||
# End of transcript or log.
|
# End of transcript or log.
|
||||||
"""]]
|
"""]]
|
||||||
|
|
||||||
|
> [[fixed|done]]. However, if you saw this behavior,
|
||||||
|
> you have large files checked directly into git. You may
|
||||||
|
> want to examine your repository and use git filter-branch to clean
|
||||||
|
> it up.
|
||||||
|
> --[[Joey]]
|
||||||
|
|
|
@ -77,4 +77,8 @@ Any thoughts on how I can get git-annex (esp. fsck) to complete would be appreci
|
||||||
Thanks
|
Thanks
|
||||||
Giovanni
|
Giovanni
|
||||||
|
|
||||||
[[!tag moreinfo]]
|
> [[fixed|done]]. However, if you saw this behavior,
|
||||||
|
> you have large files checked directly into git. You may
|
||||||
|
> want to examine your repository and use git filter-branch to clean
|
||||||
|
> it up.
|
||||||
|
> --[[Joey]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue