{- git-annex links to content - - On file systems that support them, symlinks are used. - - On other filesystems, git instead stores the symlink target in a regular - file. - - Pointer files are used instead of symlinks for unlocked files. - - Copyright 2013-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} {-# LANGUAGE CPP, BangPatterns #-} module Annex.Link where import Annex.Common import qualified Annex import qualified Git.UpdateIndex import qualified Annex.Queue import Git.Types import Git.FilePath import Annex.HashObject import Utility.FileMode import Utility.FileSystemEncoding import qualified Data.ByteString.Lazy as L type LinkTarget = String {- Checks if a file is a link to a key. -} isAnnexLink :: FilePath -> Annex (Maybe Key) isAnnexLink file = maybe Nothing (fileKey . takeFileName) <$> getAnnexLinkTarget file {- Gets the link target of a symlink. - - On a filesystem that does not support symlinks, fall back to getting the - link target by looking inside the file. - - Returns Nothing if the file is not a symlink, or not a link to annex - content. -} getAnnexLinkTarget :: FilePath -> Annex (Maybe LinkTarget) getAnnexLinkTarget f = getAnnexLinkTarget' f =<< (coreSymlinks <$> Annex.getGitConfig) {- Pass False to force looking inside file. -} getAnnexLinkTarget' :: FilePath -> Bool -> Annex (Maybe LinkTarget) getAnnexLinkTarget' file coresymlinks = if coresymlinks then check readSymbolicLink $ return Nothing else check readSymbolicLink $ check probefilecontent $ return Nothing where check getlinktarget fallback = liftIO (catchMaybeIO $ getlinktarget file) >>= \case Just l | isLinkToAnnex (fromInternalGitPath l) -> return (Just l) | otherwise -> return Nothing Nothing -> fallback probefilecontent f = withFile f ReadMode $ \h -> do -- The first 8k is more than enough to read; link -- files are small. s <- take 8192 <$> hGetContents h -- If we got the full 8k, the file is too large if length s == 8192 then return "" else -- If there are any NUL or newline -- characters, or whitespace, we -- certianly don't have a link to a -- git-annex key. return $ if any (`elem` s) "\0\n\r \t" then "" else s makeAnnexLink :: LinkTarget -> FilePath -> Annex () makeAnnexLink = makeGitLink {- Creates a link on disk. - - On a filesystem that does not support symlinks, writes the link target - to a file. Note that git will only treat the file as a symlink if - it's staged as such, so use addAnnexLink when adding a new file or - modified link to git. -} makeGitLink :: LinkTarget -> FilePath -> Annex () makeGitLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig) ( liftIO $ do void $ tryIO $ removeFile file createSymbolicLink linktarget file , liftIO $ writeFile file linktarget ) {- Creates a link on disk, and additionally stages it in git. -} addAnnexLink :: LinkTarget -> FilePath -> Annex () addAnnexLink linktarget file = do makeAnnexLink linktarget file stageSymlink file =<< hashSymlink linktarget {- Injects a symlink target into git, returning its Sha. -} hashSymlink :: LinkTarget -> Annex Sha hashSymlink linktarget = hashBlob (toInternalGitPath linktarget) {- Stages a symlink to an annexed object, using a Sha of its target. -} stageSymlink :: FilePath -> Sha -> Annex () stageSymlink file sha = Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.stageSymlink file sha) {- Injects a pointer file content into git, returning its Sha. -} hashPointerFile :: Key -> Annex Sha hashPointerFile key = hashBlob (formatPointer key) {- Stages a pointer file, using a Sha of its content -} stagePointerFile :: FilePath -> Maybe FileMode -> Sha -> Annex () stagePointerFile file mode sha = Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.stageFile sha treeitemtype file) where treeitemtype | maybe False isExecutable mode = TreeExecutable | otherwise = TreeFile writePointerFile :: FilePath -> Key -> Maybe FileMode -> IO () writePointerFile file k mode = do writeFile file (formatPointer k) maybe noop (setFileMode file) mode {- Parses a symlink target or a pointer file to a Key. - Only looks at the first line, as pointer files can have subsequent - lines. -} parseLinkOrPointer :: L.ByteString -> Maybe Key parseLinkOrPointer = parseLinkOrPointer' . decodeBS . L.take (fromIntegral maxPointerSz) where {- Want to avoid buffering really big files in git into - memory when reading files that may be pointers. - - 8192 bytes is plenty for a pointer to a key. - Pad some more to allow for any pointer files that might have - lines after the key explaining what the file is used for. -} maxPointerSz :: Integer maxPointerSz = 81920 parseLinkOrPointer' :: String -> Maybe Key parseLinkOrPointer' = go . fromInternalGitPath . takeWhile (not . lineend) where go l | isLinkToAnnex l = fileKey $ takeFileName l | otherwise = Nothing lineend '\n' = True lineend '\r' = True lineend _ = False formatPointer :: Key -> String formatPointer k = toInternalGitPath (pathSeparator:objectDir keyFile k) ++ "\n" {- Checks if a worktree file is a pointer to a key. - - Unlocked files whose content is present are not detected by this. -} isPointerFile :: FilePath -> IO (Maybe Key) isPointerFile f = catchDefaultIO Nothing $ bracket open close $ \h -> do b <- take (fromIntegral maxPointerSz) <$> hGetContents h -- strict so it reads before the file handle is closed let !mk = parseLinkOrPointer' b return mk where open = openBinaryFile f ReadMode close = hClose {- Checks a symlink target or pointer file first line to see if it - appears to point to annexed content. - - We only look for paths inside the .git directory, and not at the .git - directory itself, because GIT_DIR may cause a directory name other - than .git to be used. -} isLinkToAnnex :: FilePath -> Bool isLinkToAnnex s = (pathSeparator:objectDir) `isInfixOf` s #ifdef mingw32_HOST_OS -- '/' is still used inside pointer files on Windows, not the native -- '\' || ('/':objectDir) `isInfixOf` s #endif