From d7c93b8913ab653317b7c169b75adf418284de71 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 17 Feb 2013 15:05:55 -0400 Subject: [PATCH] fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!! --- Annex/Direct.hs | 24 ++++------- Annex/Link.hs | 74 ++++++++++++++++++++++++++++++++++ Assistant/Threads/Committer.hs | 8 +--- Assistant/Threads/Watcher.hs | 20 ++------- Backend.hs | 41 ++----------------- Command/Add.hs | 48 +++++++++++++--------- Command/Fsck.hs | 11 +++-- Command/Indirect.hs | 11 +++-- Command/Sync.hs | 7 ++-- debian/changelog | 7 ++++ 10 files changed, 142 insertions(+), 109 deletions(-) create mode 100644 Annex/Link.hs diff --git a/Annex/Direct.hs b/Annex/Direct.hs index a4839d509f..596997652e 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -10,8 +10,6 @@ module Annex.Direct where import Common.Annex import qualified Git import qualified Git.LsFiles -import qualified Git.UpdateIndex -import qualified Git.HashObject import qualified Git.Merge import qualified Git.DiffTree as DiffTree import Git.Sha @@ -24,6 +22,7 @@ import Backend import Types.KeySource import Annex.Content import Annex.Content.Direct +import Annex.Link import Utility.InodeCache import Utility.CopyFile @@ -88,10 +87,7 @@ addDirect file cache = do return False got (Just (key, _)) = ifM (liftIO $ compareInodeCache file $ Just cache) ( do - link <- calcGitLink file key - sha <- inRepo $ Git.HashObject.hashObject BlobObject link - Annex.Queue.addUpdateIndex =<< - inRepo (Git.UpdateIndex.stageSymlink file sha) + stageSymlink file =<< hashSymlink =<< calcGitLink file key writeInodeCache key cache void $ addAssociatedFile key file logStatus key InfoPresent @@ -155,8 +151,8 @@ mergeDirectCleanup d oldsha newsha = do - Symlinks are replaced with their content, if it's available. -} movein k f = do l <- calcGitLink f k - replaceFile f $ const $ - liftIO $ createSymbolicLink l f + replaceFile f $ + makeAnnexLink l toDirect k f {- Any new, modified, or renamed files were written to the temp @@ -185,7 +181,7 @@ toDirectGen k f = do liftIO . moveFile loc , return Nothing ) - (loc':_) -> ifM (liftIO $ catchBoolIO $ not . isSymbolicLink <$> getSymbolicLinkStatus loc') + (loc':_) -> ifM (not . isJust <$> getAnnexLinkTarget loc') {- Another direct file has the content; copy it. -} ( return $ Just $ replaceFile f $ @@ -197,13 +193,9 @@ toDirectGen k f = do removeDirect :: Key -> FilePath -> Annex () removeDirect k f = do locs <- removeAssociatedFile k f - when (null locs) $ do - r <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus f - case r of - Just s - | not (isSymbolicLink s) -> - moveAnnex k f - _ -> noop + when (null locs) $ + whenM (not . isJust <$> getAnnexLinkTarget f) $ + moveAnnex k f liftIO $ do nukeFile f void $ tryIO $ removeDirectory $ parentDir f diff --git a/Annex/Link.hs b/Annex/Link.hs new file mode 100644 index 0000000000..f35c069eed --- /dev/null +++ b/Annex/Link.hs @@ -0,0 +1,74 @@ +{- git-annex links to content + - + - On file systems that support them, symlinks are used. + - + - On other filesystems, git instead stores the symlink target in a regular + - file. + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Link where + +import Common.Annex +import qualified Annex +import qualified Git.HashObject +import qualified Git.UpdateIndex +import qualified Annex.Queue +import Git.Types + +{- Checks if a file is a link to a key. -} +isAnnexLink :: FilePath -> Annex (Maybe Key) +isAnnexLink file = maybe Nothing (fileKey . takeFileName) <$> getAnnexLinkTarget file + +{- Gets the link target of a symlink. + - + - On a filesystem that does not support symlinks, get the link + - target by looking inside the file. (Only return at first 8k of the file, + - more than enough for any symlink target.) + - + - Returns Nothing if the file is not a symlink, or not a link to annex + - content. + -} +getAnnexLinkTarget :: FilePath -> Annex (Maybe String) +getAnnexLinkTarget file = do + v <- ifM (coreSymlinks <$> Annex.getGitConfig) + ( liftIO $ catchMaybeIO $ readSymbolicLink file + , liftIO $ catchMaybeIO $ take 8192 <$> readFile file + ) + case v of + Nothing -> return Nothing + Just l + | isLinkToAnnex l -> return v + | otherwise -> return Nothing + +{- Creates a link on disk. + - + - On a filesystem that does not support symlinks, writes the link target + - to a file. Note that git will only treat the file as a symlink if + - it's staged as such, so use addAnnexLink when adding a new file or + - modified link to git. + -} +makeAnnexLink :: String -> FilePath -> Annex () +makeAnnexLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig) + ( liftIO $ createSymbolicLink linktarget file + , liftIO $ writeFile file linktarget + ) + +{- Creates a link on disk, and additionally stages it in git. -} +addAnnexLink :: String -> FilePath -> Annex () +addAnnexLink linktarget file = do + makeAnnexLink linktarget file + stageSymlink file =<< hashSymlink linktarget + +{- Injects a symlink target into git, returning its Sha. -} +hashSymlink :: String -> Annex Sha +hashSymlink linktarget = inRepo $ Git.HashObject.hashObject BlobObject linktarget + +{- Stages a symlink to the annex, using a Sha of its target. -} +stageSymlink :: FilePath -> Sha -> Annex () +stageSymlink file sha = + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageSymlink file sha) diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index ba8d595f60..b40b8ad285 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -15,16 +15,13 @@ import Assistant.Types.Changes import Assistant.Commits import Assistant.Alert import Assistant.DaemonStatus -import Assistant.Threads.Watcher import Assistant.TransferQueue import Logs.Transfer import Logs.Location import qualified Annex.Queue import qualified Git.Command -import qualified Git.HashObject import qualified Git.LsFiles import qualified Git.Version -import Git.Types import qualified Command.Add import Utility.ThreadScheduler import qualified Utility.Lsof as Lsof @@ -33,6 +30,7 @@ import Types.KeySource import Config import Annex.Exception import Annex.Content +import Annex.Link import qualified Annex import Data.Time.Clock @@ -216,9 +214,7 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do , Command.Add.link file key True ) whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $ do - sha <- inRepo $ - Git.HashObject.hashObject BlobObject link - stageSymlink file sha + stageSymlink file =<< hashSymlink link showEndOk queueTransfers Next key (Just file) Upload return $ Just change diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 38116cdc6d..8c77fa64a8 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -12,7 +12,6 @@ module Assistant.Threads.Watcher ( WatcherException(..), checkCanWatch, needLsof, - stageSymlink, onAddSymlink, runHandler, ) where @@ -32,13 +31,13 @@ import qualified Annex import qualified Annex.Queue import qualified Git import qualified Git.UpdateIndex -import qualified Git.HashObject import qualified Git.LsFiles as LsFiles import qualified Backend import Annex.Content import Annex.Direct import Annex.Content.Direct import Annex.CatFile +import Annex.Link import Git.Types import Config import Utility.ThreadScheduler @@ -206,7 +205,7 @@ onAddSymlink file filestatus = go =<< liftAnnex (Backend.lookupFile file) ensurestaged (Just link) s , do liftIO $ removeFile file - liftIO $ createSymbolicLink link file + liftAnnex $ Backend.makeAnnexLink link file checkcontent key =<< getDaemonStatus addlink link ) @@ -242,10 +241,7 @@ onAddSymlink file filestatus = go =<< liftAnnex (Backend.lookupFile file) Just (currlink, sha) | s2w8 link == L.unpack currlink -> stageSymlink file sha - _ -> do - sha <- inRepo $ - Git.HashObject.hashObject BlobObject link - stageSymlink file sha + _ -> stageSymlink file =<< hashSymlink link madeChange file LinkChange {- When a new link appears, or a link is changed, after the startup @@ -289,13 +285,3 @@ onErr msg _ = do liftAnnex $ warning msg void $ addAlert $ warningAlert "watcher" msg noChange - -{- Adds a symlink to the index, without ever accessing the actual symlink - - on disk. This avoids a race if git add is used, where the symlink is - - changed to something else immediately after creation. It also allows - - direct mode to work. - -} -stageSymlink :: FilePath -> Sha -> Annex () -stageSymlink file sha = - Annex.Queue.addUpdateIndex =<< - inRepo (Git.UpdateIndex.stageSymlink file sha) diff --git a/Backend.hs b/Backend.hs index 076f7c2cee..6bbf3f75e1 100644 --- a/Backend.hs +++ b/Backend.hs @@ -17,12 +17,11 @@ module Backend ( maybeLookupBackendName ) where -import System.Posix.Files - import Common.Annex import qualified Annex import Annex.CheckAttr import Annex.CatFile +import Annex.Link import Types.Key import Types.KeySource import qualified Types.Backend as B @@ -77,15 +76,12 @@ genKey' (b:bs) source = do | otherwise = c {- Looks up the key and backend corresponding to an annexed file, - - by examining what the file symlinks to. + - by examining what the file links to. - - - In direct mode, there is often no symlink on disk, in which case - - the symlink is looked up in git instead. However, a real symlink + - In direct mode, there is often no link on disk, in which case + - the symlink is looked up in git instead. However, a real link - on disk still takes precedence over what was committed to git in direct - mode. - - - - On a filesystem that does not support symlinks, git will instead store - - the symlink target in a regular file. -} lookupFile :: FilePath -> Annex (Maybe (Key, Backend)) lookupFile file = do @@ -107,35 +103,6 @@ lookupFile file = do " (unknown backend " ++ bname ++ ")" return Nothing -{- Checks if a file is a symlink to a key. - - - - On a filesystem that does not support symlinks, git will instead store - - the symlink target in a regular file. (Only look at first 8k of file, - - more than enough for any symlink target.) - -} -isAnnexLink :: FilePath -> Annex (Maybe Key) -isAnnexLink file = maybe Nothing makekey <$> gettarget - where - gettarget = ifM (coreSymlinks <$> Annex.getGitConfig) - ( liftIO $ catchMaybeIO $ readSymbolicLink file - , liftIO $ catchMaybeIO $ take 8192 <$> readFile file - ) - makekey l - | isLinkToAnnex l = fileKey $ takeFileName l - | otherwise = Nothing - -{- Creates a symlink on disk. - - - - On a filesystem that does not support symlinks, writes the link target - - to a file. Note that git will only treat the file as a symlink if - - it's staged as such. - -} -makeAnnexLink :: String -> FilePath -> Annex () -makeAnnexLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig) - ( liftIO $ createSymbolicLink linktarget file - , liftIO $ writeFile file linktarget - ) - {- Looks up the backend that should be used for a file. - That can be configured on a per-file basis in the gitattributes file. -} chooseBackend :: FilePath -> Annex (Maybe Backend) diff --git a/Command/Add.hs b/Command/Add.hs index 9cd5ec87b5..7ebf979cd1 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -12,22 +12,20 @@ module Command.Add where import Common.Annex import Annex.Exception import Command -import qualified Annex -import qualified Annex.Queue import Types.KeySource import Backend import Logs.Location import Annex.Content import Annex.Content.Direct import Annex.Perms +import Annex.Link +import qualified Annex +import qualified Annex.Queue #ifndef WITH_ANDROID import Utility.Touch #endif import Utility.FileMode import Config -import qualified Git.HashObject -import qualified Git.UpdateIndex -import Git.Types import Utility.InodeCache def :: [Command] @@ -159,7 +157,7 @@ undo file key e = do link :: FilePath -> Key -> Bool -> Annex String link file key hascontent = handle (undo file key) $ do l <- calcGitLink file key - liftIO $ createSymbolicLink l file + makeAnnexLink l file #ifndef WITH_ANDROID when hascontent $ do @@ -173,23 +171,35 @@ link file key hascontent = handle (undo file key) $ do return l {- Note: Several other commands call this, and expect it to - - create the symlink and add it. -} + - create the link and add it. + - + - In direct mode, when we have the content of the file, it's left as-is, + - and we just stage a symlink to git. + - + - Otherwise, as long as the filesystem supports symlinks, we use + - git add, rather than directly staging the symlink to git. + - Using git add is best because it allows the queuing to work + - and is faster (staging the symlink runs hash-object commands each time). + - Also, using git add allows it to skip gitignored files, unless forced + - to include them. + -} cleanup :: FilePath -> Key -> Bool -> CommandCleanup cleanup file key hascontent = do when hascontent $ logStatus key InfoPresent ifM (isDirect <&&> pure hascontent) - ( do - l <- calcGitLink file key - sha <- inRepo $ Git.HashObject.hashObject BlobObject l - Annex.Queue.addUpdateIndex =<< - inRepo (Git.UpdateIndex.stageSymlink file sha) - , do - _ <- link file key hascontent - params <- ifM (Annex.getState Annex.force) - ( return [Param "-f"] - , return [] - ) - Annex.Queue.addCommand "add" (params++[Param "--"]) [file] + ( stageSymlink file =<< hashSymlink =<< calcGitLink file key + , ifM (coreSymlinks <$> Annex.getGitConfig) + ( do + _ <- link file key hascontent + params <- ifM (Annex.getState Annex.force) + ( return [Param "-f"] + , return [] + ) + Annex.Queue.addCommand "add" (params++[Param "--"]) [file] + , do + l <- link file key hascontent + addAnnexLink l file + ) ) return True diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 6662455171..b662ee5787 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -10,7 +10,6 @@ module Command.Fsck where import Common.Annex import Command import qualified Annex -import qualified Annex.Queue import qualified Remote import qualified Types.Backend import qualified Types.Key @@ -18,6 +17,7 @@ import qualified Backend import Annex.Content import Annex.Content.Direct import Annex.Perms +import Annex.Link import Logs.Location import Logs.Trust import Annex.UUID @@ -182,14 +182,14 @@ performBare key backend = check check :: [Annex Bool] -> Annex Bool check cs = all id <$> sequence cs -{- Checks that the file's symlink points correctly to the content. +{- Checks that the file's link points correctly to the content. - - - In direct mode, there is only a symlink when the content is not present. + - In direct mode, there is only a link when the content is not present. -} fixLink :: Key -> FilePath -> Annex Bool fixLink key file = do want <- calcGitLink file key - have <- liftIO $ catchMaybeIO $ readSymbolicLink file + have <- getAnnexLinkTarget file maybe noop (go want) have return True where @@ -210,8 +210,7 @@ fixLink key file = do showNote "fixing link" liftIO $ createDirectoryIfMissing True (parentDir file) liftIO $ removeFile file - liftIO $ createSymbolicLink want file - Annex.Queue.addCommand "add" [Param "--force", Param "--"] [file] + addAnnexLink want file {- Checks that the location log reflects the current status of the key, - in this repository only. -} diff --git a/Command/Indirect.hs b/Command/Indirect.hs index 90e0b6eafb..ac97be753f 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -13,6 +13,7 @@ import qualified Git import qualified Git.Command import qualified Git.LsFiles import Config +import qualified Annex import Annex.Direct import Annex.Content import Annex.CatFile @@ -27,10 +28,12 @@ seek = [withNothing start] start :: CommandStart start = ifM isDirect - ( ifM probeCrippledFileSystem - ( error "This repository seems to be on a crippled filesystem, you must use direct mode." - , next perform - ) + ( do + unlessM (coreSymlinks <$> Annex.getGitConfig) $ + error "Git is configured to not use symlinks, so you must use direct mode." + whenM probeCrippledFileSystem $ + error "This repository seems to be on a crippled filesystem, you must use direct mode." + next perform , stop ) diff --git a/Command/Sync.hs b/Command/Sync.hs index 6d3a766598..cd0398ffa8 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -17,6 +17,7 @@ import qualified Annex.Queue import Annex.Content import Annex.Direct import Annex.CatFile +import Annex.Link import qualified Git.Command import qualified Git.LsFiles as LsFiles import qualified Git.Merge @@ -263,10 +264,8 @@ resolveMerge' u makelink (Just key) = do let dest = mergeFile file key l <- calcGitLink dest key - liftIO $ do - nukeFile dest - createSymbolicLink l dest - Annex.Queue.addCommand "add" [Param "--force", Param "--"] [dest] + liftIO $ nukeFile dest + addAnnexLink l dest whenM (isDirect) $ toDirect key dest makelink _ = noop diff --git a/debian/changelog b/debian/changelog index fddf13e047..60d192e55c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +git-annex (3.20130217) UNRELEASED; urgency=low + + * Should now fully support git repositories with core.symlinks=false; + always using git's pseudosymlink files in such repositories. + + -- Joey Hess Sun, 17 Feb 2013 16:42:16 -0400 + git-annex (3.20130216) unstable; urgency=low * Now uses the Haskell uuid library, rather than needing a uuid program.