34530e59d9
.. and have to be checked to see if they are a pointed to an annexed file. Cases where such memory use could occur included, but were not limited to: - git commit -a of a large unlocked file (in v5 mode) - git-annex adjust when a large file was checked into git directly Generally, any use of catKey was a potential problem. Fix by using git cat-file --batch-check to check size before catting. This adds another git batch process, which is included in the CatFileHandle for simplicity. There could be performance impact, anywhere catKey is used. Particularly likely to affect adjusted branch generation speed, and operations on unlocked files in v6 mode. Hopefully since the --batch-check and --batch read the same data, disk buffering will avoid most overhead. Leaving only the overhead of talking to the process over the pipe and whatever computation --batch-check needs to do. This commit was sponsored by Bruno BEAUFILS on Patreon.
192 lines
6 KiB
Haskell
192 lines
6 KiB
Haskell
{- git-annex links to content
|
|
-
|
|
- On file systems that support them, symlinks are used.
|
|
-
|
|
- On other filesystems, git instead stores the symlink target in a regular
|
|
- file.
|
|
-
|
|
- Pointer files are used instead of symlinks for unlocked files.
|
|
-
|
|
- Copyright 2013-2015 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE CPP, BangPatterns #-}
|
|
|
|
module Annex.Link where
|
|
|
|
import Annex.Common
|
|
import qualified Annex
|
|
import qualified Git.UpdateIndex
|
|
import qualified Annex.Queue
|
|
import Git.Types
|
|
import Git.FilePath
|
|
import Annex.HashObject
|
|
import Utility.FileMode
|
|
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
type LinkTarget = String
|
|
|
|
{- Checks if a file is a link to a key. -}
|
|
isAnnexLink :: FilePath -> Annex (Maybe Key)
|
|
isAnnexLink file = maybe Nothing (fileKey . takeFileName) <$> getAnnexLinkTarget file
|
|
|
|
{- Gets the link target of a symlink.
|
|
-
|
|
- On a filesystem that does not support symlinks, fall back to getting the
|
|
- link target by looking inside the file.
|
|
-
|
|
- Returns Nothing if the file is not a symlink, or not a link to annex
|
|
- content.
|
|
-}
|
|
getAnnexLinkTarget :: FilePath -> Annex (Maybe LinkTarget)
|
|
getAnnexLinkTarget f = getAnnexLinkTarget' f
|
|
=<< (coreSymlinks <$> Annex.getGitConfig)
|
|
|
|
{- Pass False to force looking inside file. -}
|
|
getAnnexLinkTarget' :: FilePath -> Bool -> Annex (Maybe LinkTarget)
|
|
getAnnexLinkTarget' file coresymlinks = if coresymlinks
|
|
then check readSymbolicLink $
|
|
return Nothing
|
|
else check readSymbolicLink $
|
|
check probefilecontent $
|
|
return Nothing
|
|
where
|
|
check getlinktarget fallback = do
|
|
v <- liftIO $ catchMaybeIO $ getlinktarget file
|
|
case v of
|
|
Just l
|
|
| isLinkToAnnex (fromInternalGitPath l) -> return v
|
|
| otherwise -> return Nothing
|
|
Nothing -> fallback
|
|
|
|
probefilecontent f = withFile f ReadMode $ \h -> do
|
|
fileEncoding h
|
|
-- The first 8k is more than enough to read; link
|
|
-- files are small.
|
|
s <- take 8192 <$> hGetContents h
|
|
-- If we got the full 8k, the file is too large
|
|
if length s == 8192
|
|
then return ""
|
|
else
|
|
-- If there are any NUL or newline
|
|
-- characters, or whitespace, we
|
|
-- certianly don't have a link to a
|
|
-- git-annex key.
|
|
return $ if any (`elem` s) "\0\n\r \t"
|
|
then ""
|
|
else s
|
|
|
|
makeAnnexLink :: LinkTarget -> FilePath -> Annex ()
|
|
makeAnnexLink = makeGitLink
|
|
|
|
{- Creates a link on disk.
|
|
-
|
|
- On a filesystem that does not support symlinks, writes the link target
|
|
- to a file. Note that git will only treat the file as a symlink if
|
|
- it's staged as such, so use addAnnexLink when adding a new file or
|
|
- modified link to git.
|
|
-}
|
|
makeGitLink :: LinkTarget -> FilePath -> Annex ()
|
|
makeGitLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig)
|
|
( liftIO $ do
|
|
void $ tryIO $ removeFile file
|
|
createSymbolicLink linktarget file
|
|
, liftIO $ writeFile file linktarget
|
|
)
|
|
|
|
{- Creates a link on disk, and additionally stages it in git. -}
|
|
addAnnexLink :: LinkTarget -> FilePath -> Annex ()
|
|
addAnnexLink linktarget file = do
|
|
makeAnnexLink linktarget file
|
|
stageSymlink file =<< hashSymlink linktarget
|
|
|
|
{- Injects a symlink target into git, returning its Sha. -}
|
|
hashSymlink :: LinkTarget -> Annex Sha
|
|
hashSymlink linktarget = hashBlob (toInternalGitPath linktarget)
|
|
|
|
{- Stages a symlink to an annexed object, using a Sha of its target. -}
|
|
stageSymlink :: FilePath -> Sha -> Annex ()
|
|
stageSymlink file sha =
|
|
Annex.Queue.addUpdateIndex =<<
|
|
inRepo (Git.UpdateIndex.stageSymlink file sha)
|
|
|
|
{- Injects a pointer file content into git, returning its Sha. -}
|
|
hashPointerFile :: Key -> Annex Sha
|
|
hashPointerFile key = hashBlob (formatPointer key)
|
|
|
|
{- Stages a pointer file, using a Sha of its content -}
|
|
stagePointerFile :: FilePath -> Maybe FileMode -> Sha -> Annex ()
|
|
stagePointerFile file mode sha =
|
|
Annex.Queue.addUpdateIndex =<<
|
|
inRepo (Git.UpdateIndex.stageFile sha blobtype file)
|
|
where
|
|
blobtype
|
|
| maybe False isExecutable mode = ExecutableBlob
|
|
| otherwise = FileBlob
|
|
|
|
writePointerFile :: FilePath -> Key -> Maybe FileMode -> IO ()
|
|
writePointerFile file k mode = do
|
|
writeFile file (formatPointer k)
|
|
maybe noop (setFileMode file) mode
|
|
|
|
{- Parses a symlink target or a pointer file to a Key.
|
|
- Only looks at the first line, as pointer files can have subsequent
|
|
- lines. -}
|
|
parseLinkOrPointer :: L.ByteString -> Maybe Key
|
|
parseLinkOrPointer = parseLinkOrPointer'
|
|
. decodeBS . L.take (fromIntegral maxPointerSz)
|
|
where
|
|
|
|
{- Want to avoid buffering really big files in git into
|
|
- memory when reading files that may be pointers.
|
|
-
|
|
- 8192 bytes is plenty for a pointer to a key.
|
|
- Pad some more to allow for any pointer files that might have
|
|
- lines after the key explaining what the file is used for. -}
|
|
maxPointerSz :: Integer
|
|
maxPointerSz = 81920
|
|
|
|
parseLinkOrPointer' :: String -> Maybe Key
|
|
parseLinkOrPointer' = go . fromInternalGitPath . takeWhile (not . lineend)
|
|
where
|
|
go l
|
|
| isLinkToAnnex l = fileKey $ takeFileName l
|
|
| otherwise = Nothing
|
|
lineend '\n' = True
|
|
lineend '\r' = True
|
|
lineend _ = False
|
|
|
|
formatPointer :: Key -> String
|
|
formatPointer k =
|
|
toInternalGitPath (pathSeparator:objectDir </> keyFile k) ++ "\n"
|
|
|
|
{- Checks if a worktree file is a pointer to a key.
|
|
-
|
|
- Unlocked files whose content is present are not detected by this. -}
|
|
isPointerFile :: FilePath -> IO (Maybe Key)
|
|
isPointerFile f = catchDefaultIO Nothing $ bracket open close $ \h -> do
|
|
b <- take (fromIntegral maxPointerSz) <$> hGetContents h
|
|
-- strict so it reads before the file handle is closed
|
|
let !mk = parseLinkOrPointer' b
|
|
return mk
|
|
where
|
|
open = openBinaryFile f ReadMode
|
|
close = hClose
|
|
|
|
{- Checks a symlink target or pointer file first line to see if it
|
|
- appears to point to annexed content.
|
|
-
|
|
- We only look for paths inside the .git directory, and not at the .git
|
|
- directory itself, because GIT_DIR may cause a directory name other
|
|
- than .git to be used.
|
|
-}
|
|
isLinkToAnnex :: FilePath -> Bool
|
|
isLinkToAnnex s = (pathSeparator:objectDir) `isInfixOf` s
|
|
#ifdef mingw32_HOST_OS
|
|
-- '/' is still used inside pointer files on Windows, not the native
|
|
-- '\'
|
|
|| ('/':objectDir) `isInfixOf` s
|
|
#endif
|