git-annex/Annex/Direct.hs

482 lines
16 KiB
Haskell
Raw Normal View History

2012-12-12 23:20:38 +00:00
{- git-annex direct mode
-
- This is deprecated, and will be removed when direct mode gets removed
- from git-annex.
2012-12-12 23:20:38 +00:00
-
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
2012-12-12 23:20:38 +00:00
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Direct where
import Annex.Common
import qualified Annex
2012-12-12 23:20:38 +00:00
import qualified Git
import qualified Git.LsFiles
import qualified Git.Merge
import qualified Git.DiffTree as DiffTree
import qualified Git.Config
import qualified Git.Ref
import qualified Git.Branch
import Git.Sha
import Git.FilePath
2012-12-12 23:20:38 +00:00
import Git.Types
import Config
import Annex.CatFile
import qualified Annex.Queue
2012-12-12 23:20:38 +00:00
import Logs.Location
import Backend
import Types.KeySource
import Annex.Content
import Annex.Content.Direct
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
import Annex.Link
2013-02-14 20:17:40 +00:00
import Utility.InodeCache
import Utility.CopyFile
import Annex.Perms
import Annex.ReplaceFile
import Annex.VariantFile
import Git.Index
import Annex.GitOverlay
2014-07-10 04:32:23 +00:00
import Annex.LockFile
import Annex.InodeSentinal
2012-12-12 23:20:38 +00:00
{- Uses git ls-files to find files that need to be committed, and stages
- them into the index. Returns True if some changes were staged. -}
stageDirect :: Annex Bool
stageDirect = do
Annex.Queue.flush
top <- fromRepo Git.repoPath
(l, cleanup) <- inRepo $ Git.LsFiles.stagedOthersDetails [top]
2012-12-12 23:20:38 +00:00
forM_ l go
void $ liftIO cleanup
staged <- Annex.Queue.size
Annex.Queue.flush
return $ staged /= 0
where
{- Determine what kind of modified or deleted file this is, as
- efficiently as we can, by getting any key that's associated
- with it in git, as well as its stat info. -}
go (file, Just sha, Just _mode) = withTSDelta $ \delta -> do
shakey <- catKey sha
2012-12-12 23:20:38 +00:00
mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
mcache <- liftIO $ maybe (pure Nothing) (toInodeCache delta file) mstat
filekey <- isAnnexLink file
case (shakey, filekey, mstat, mcache) of
(_, Just key, _, _)
| shakey == filekey -> noop
{- A changed symlink. -}
| otherwise -> stageannexlink file key
(Just key, _, _, Just cache) -> do
2012-12-12 23:20:38 +00:00
{- All direct mode files will show as
- modified, so compare the cache to see if
- it really was. -}
2013-02-14 20:17:40 +00:00
oldcache <- recordedInodeCache key
case oldcache of
[] -> modifiedannexed file key cache
_ -> unlessM (elemInodeCaches cache oldcache) $
modifiedannexed file key cache
(Just key, _, Nothing, _) -> deletedannexed file key
(Nothing, _, Nothing, _) -> deletegit file
(_, _, Just _, _) -> addgit file
go _ = noop
2012-12-12 23:20:38 +00:00
modifiedannexed file oldkey cache = do
void $ removeAssociatedFile oldkey file
void $ addDirect file cache
deletedannexed file key = do
void $ removeAssociatedFile key file
deletegit file
stageannexlink file key = do
l <- calcRepo $ gitAnnexLink file key
stageSymlink file =<< hashSymlink l
void $ addAssociatedFile key file
2012-12-12 23:20:38 +00:00
addgit file = Annex.Queue.addCommand "add" [Param "-f"] [file]
deletegit file = Annex.Queue.addCommand "rm" [Param "-qf"] [file]
{- Run before a commit to update direct mode bookeeping to reflect the
- staged changes being committed. -}
preCommitDirect :: Annex Bool
preCommitDirect = do
(diffs, clean) <- inRepo $ DiffTree.diffIndex Git.Ref.headRef
makeabs <- flip fromTopFilePath <$> gitRepo
forM_ diffs (go makeabs)
liftIO clean
where
go makeabs diff = do
withkey (DiffTree.srcsha diff) (DiffTree.srcmode diff) removeAssociatedFile
withkey (DiffTree.dstsha diff) (DiffTree.dstmode diff) addAssociatedFile
where
withkey sha _mode a = when (sha /= nullSha) $ do
k <- catKey sha
case k of
Nothing -> noop
Just key -> void $ a key $
makeabs $ DiffTree.file diff
2012-12-12 23:20:38 +00:00
{- Adds a file to the annex in direct mode. Can fail, if the file is
- modified or deleted while it's being added. -}
2013-02-14 20:17:40 +00:00
addDirect :: FilePath -> InodeCache -> Annex Bool
2012-12-12 23:20:38 +00:00
addDirect file cache = do
showStart "add" file
let source = KeySource
{ keyFilename = file
, contentLocation = file
, inodeCache = Just cache
2012-12-12 23:20:38 +00:00
}
got =<< genKey source =<< chooseBackend file
where
got Nothing = do
showEndFail
return False
got (Just (key, _)) = ifM (sameInodeCache file [cache])
2012-12-12 23:20:38 +00:00
( do
l <- calcRepo $ gitAnnexLink file key
stageSymlink file =<< hashSymlink l
addInodeCache key cache
2012-12-12 23:20:38 +00:00
void $ addAssociatedFile key file
logStatus key InfoPresent
showEndOk
return True
, do
showEndFail
return False
)
{- In direct mode, git merge would usually refuse to do anything, since it
- sees present direct mode files as type changed files.
-
- So, to handle a merge, it's run with the work tree set to a temp
- directory, and the merge is staged into a copy of the index.
- Then the work tree is updated to reflect the merge, and
- finally, the merge is committed and the real index updated.
-
- A lock file is used to avoid races with any other caller of mergeDirect.
-
- To avoid other git processes from making changes to the index while our
- merge is in progress, the index lock file is used as the temp index
- file. This is the same as what git does when updating the index
- normally.
-}
mergeDirect :: Maybe Git.Ref -> Maybe Git.Ref -> Git.Branch -> Annex Bool -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Annex Bool
mergeDirect startbranch oldref branch resolvemerge mergeconfig commitmode = exclusively $ do
reali <- liftIO . absPath =<< fromRepo indexFile
tmpi <- liftIO . absPath =<< fromRepo indexFileLock
liftIO $ whenM (doesFileExist reali) $
copyFile reali tmpi
d <- fromRepo gitAnnexMergeDir
liftIO $ do
whenM (doesDirectoryExist d) $
removeDirectoryRecursive d
createDirectoryIfMissing True d
withIndexFile tmpi $ do
merged <- stageMerge d branch mergeconfig commitmode
ok <- if merged
then return True
else resolvemerge
if ok
then do
mergeDirectCleanup d (fromMaybe Git.Sha.emptyTree oldref)
mergeDirectCommit merged startbranch branch commitmode
liftIO $ whenM (doesFileExist tmpi) $
rename tmpi reali
2015-10-15 18:27:14 +00:00
else do
liftIO $ nukeFile tmpi
liftIO $ removeDirectoryRecursive d
return ok
where
2014-07-10 04:32:23 +00:00
exclusively = withExclusiveLock gitAnnexMergeLock
{- Stage a merge into the index, avoiding changing HEAD or the current
- branch. -}
stageMerge :: FilePath -> Git.Branch -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Annex Bool
stageMerge d branch mergeconfig commitmode = do
-- XXX A bug in git makes stageMerge unsafe to use if the git repo
-- is configured with core.symlinks=false
-- Using merge is not ideal though, since it will
-- update the current branch immediately, before the work tree
-- has been updated, which would leave things in an inconsistent
-- state if mergeDirectCleanup is interrupted.
-- <http://marc.info/?l=git&m=140262402204212&w=2>
merger <- ifM (coreSymlinks <$> Annex.getGitConfig)
( return $ \ref -> Git.Merge.stageMerge ref mergeconfig
, return $ \ref -> Git.Merge.merge ref mergeconfig commitmode
)
inRepo $ \g -> do
wd <- liftIO $ absPath d
gd <- liftIO $ absPath $ Git.localGitDir g
merger branch $
g { location = Local { gitdir = gd, worktree = Just (addTrailingPathSeparator wd) } }
{- Commits after a direct mode merge is complete, and after the work
- tree has been updated by mergeDirectCleanup.
-}
mergeDirectCommit :: Bool -> Maybe Git.Ref -> Git.Branch -> Git.Branch.CommitMode -> Annex ()
mergeDirectCommit allowff old branch commitmode = do
void preCommitDirect
d <- fromRepo Git.localGitDir
let merge_head = d </> "MERGE_HEAD"
let merge_msg = d </> "MERGE_MSG"
let merge_mode = d </> "MERGE_MODE"
ifM (pure allowff <&&> canff)
2016-03-31 16:27:48 +00:00
( inRepo $ Git.Branch.update "merge" Git.Ref.headRef branch -- fast forward
, do
msg <- liftIO $
catchDefaultIO ("merge " ++ fromRef branch) $
readFile merge_msg
void $ inRepo $ Git.Branch.commit commitmode False msg
Git.Ref.headRef [Git.Ref.headRef, branch]
)
liftIO $ mapM_ nukeFile [merge_head, merge_msg, merge_mode]
where
canff = maybe (return False) (\o -> inRepo $ Git.Branch.fastForwardable o branch) old
mergeDirectCleanup :: FilePath -> Git.Ref -> Annex ()
2015-10-15 18:27:14 +00:00
mergeDirectCleanup d oldref = updateWorkTree d oldref False
{- Updates the direct mode work tree to reflect the changes staged in the
- index by a git command, that was run in a temporary work tree.
-
- Uses diff-index to compare the staged changes with provided ref
- which should be the tree before the merge, and applies those
- changes to the work tree.
-
- There are really only two types of changes: An old item can be deleted,
- or a new item added. Two passes are made, first deleting and then
- adding. This is to handle cases where eg, a file is deleted and a
2013-12-12 19:16:44 +00:00
- directory is added. (The diff-tree output may list these in the opposite
- order, but we cannot add the directory until the file with the
- same name is removed.)
-}
updateWorkTree :: FilePath -> Git.Ref -> Bool -> Annex ()
updateWorkTree d oldref force = do
(items, cleanup) <- inRepo $ DiffTree.diffIndex oldref
makeabs <- flip fromTopFilePath <$> gitRepo
let fsitems = zip (map (makeabs . DiffTree.file) items) items
forM_ fsitems $
go makeabs DiffTree.srcsha moveout moveout_raw
forM_ fsitems $
go makeabs DiffTree.dstsha movein movein_raw
2013-04-03 07:52:41 +00:00
void $ liftIO cleanup
where
go makeabs getsha a araw (f, item)
| getsha item == nullSha = noop
| otherwise = void $
tryNonAsync . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f)
=<< catKey (getsha item)
moveout _ _ = removeDirect
{- Files deleted by the merge are removed from the work tree.
- Empty work tree directories are removed, per git behavior. -}
moveout_raw _ _ f = liftIO $ do
nukeFile f
void $ tryIO $ removeDirectory $ parentDir f
{- If the file is already present, with the right content for the
- key, it's left alone.
-
- If the file is already present, and does not exist in the
- oldref, preserve this local file.
-
- Otherwise, create the symlink and then if possible, replace it
- with the content. -}
movein item makeabs k f = unlessM (goodContent k f) $ do
unless force $ preserveUnannexed item makeabs f oldref
l <- calcRepo $ gitAnnexLink f k
replaceFile f $ makeAnnexLink l
toDirect k f
{- Any new, modified, or renamed files were written to the temp
- directory by the merge, and are moved to the real work tree. -}
movein_raw item makeabs f = do
unless force $ preserveUnannexed item makeabs f oldref
liftIO $ do
createDirectoryIfMissing True $ parentDir f
void $ tryIO $ rename (d </> getTopFilePath (DiffTree.file item)) f
{- If the file that's being moved in is already present in the work
- tree, but did not exist in the oldref, preserve this
- local, unannexed file (or directory), as "variant-local".
-
- It's also possible that the file that's being moved in
- is in a directory that collides with an exsting, non-annexed
- file (not a directory), which should be preserved.
-}
preserveUnannexed :: DiffTree.DiffTreeItem -> (TopFilePath -> FilePath) -> FilePath -> Ref -> Annex ()
preserveUnannexed item makeabs absf oldref = do
whenM (liftIO (collidingitem absf) <&&> unannexed absf) $
liftIO $ findnewname absf 0
checkdirs (DiffTree.file item)
where
checkdirs from = case upFrom (getTopFilePath from) of
Nothing -> noop
Just p -> do
let d = asTopFilePath p
let absd = makeabs d
whenM (liftIO (colliding_nondir absd) <&&> unannexed absd) $
liftIO $ findnewname absd 0
checkdirs d
collidingitem f = isJust
<$> catchMaybeIO (getSymbolicLinkStatus f)
colliding_nondir f = maybe False (not . isDirectory)
<$> catchMaybeIO (getSymbolicLinkStatus f)
unannexed f = (isNothing <$> isAnnexLink f)
<&&> (isNothing <$> catFileDetails oldref f)
findnewname :: FilePath -> Int -> IO ()
findnewname f n = do
let localf = mkVariant f
("local" ++ if n > 0 then show n else "")
ifM (collidingitem localf)
( findnewname f (n+1)
, rename f localf
`catchIO` const (findnewname f (n+1))
)
{- If possible, converts a symlink in the working tree into a direct
- mode file. If the content is not available, leaves the symlink
- unchanged. -}
toDirect :: Key -> FilePath -> Annex ()
2013-02-18 06:39:40 +00:00
toDirect k f = fromMaybe noop =<< toDirectGen k f
toDirectGen :: Key -> FilePath -> Annex (Maybe (Annex ()))
toDirectGen k f = do
loc <- calcRepo $ gitAnnexLocation k
ifM (liftIO $ doesFileExist loc)
( return $ Just $ fromindirect loc
, do
{- Copy content from another direct file. -}
absf <- liftIO $ absPath f
dlocs <- filterM (goodContent k) =<<
filterM (\l -> isNothing <$> getAnnexLinkTarget l) =<<
(filter (/= absf) <$> addAssociatedFile k f)
case dlocs of
[] -> return Nothing
(dloc:_) -> return $ Just $ fromdirect dloc
)
where
fromindirect loc = do
{- Move content from annex to direct file. -}
updateInodeCache k loc
void $ addAssociatedFile k f
modifyContent loc $ do
thawContent loc
liftIO (replaceFileFrom loc f)
`catchIO` (\_ -> freezeContent loc)
fromdirect loc = do
replaceFile f $
liftIO . void . copyFileExternal CopyAllMetaData loc
updateInodeCache k f
{- Removes a direct mode file, while retaining its content in the annex
- (unless its content has already been changed). -}
removeDirect :: Key -> FilePath -> Annex ()
removeDirect k f = do
void $ removeAssociatedFileUnchecked k f
unlessM (inAnnex k) $
annex.securehashesonly Cryptographically secure hashes can be forced to be used in a repository, by setting annex.securehashesonly. This does not prevent the git repository from containing files with insecure hashes, but it does prevent the content of such files from being pulled into .git/annex/objects from another repository. We want to make sure that at no point does git-annex accept content into .git/annex/objects that is hashed with an insecure key. Here's how it was done: * .git/annex/objects/xx/yy/KEY/ is kept frozen, so nothing can be written to it normally * So every place that writes content must call, thawContent or modifyContent. We can audit for these, and be sure we've considered all cases. * The main functions are moveAnnex, and linkToAnnex; these were made to check annex.securehashesonly, and are the main security boundary for annex.securehashesonly. * Most other calls to modifyContent deal with other files in the KEY directory (inode cache etc). The other ones that mess with the content are: - Annex.Direct.toDirectGen, in which content already in the annex directory is moved to the direct mode file, so not relevant. - fix and lock, which don't add new content - Command.ReKey.linkKey, which manually unlocks it to make a copy. * All other calls to thawContent appear safe. Made moveAnnex return a Bool, so checked all callsites and made them deal with a failure in appropriate ways. linkToAnnex simply returns LinkAnnexFailed; all callsites already deal with it failing in appropriate ways. This commit was sponsored by Riku Voipio.
2017-02-27 17:01:32 +00:00
-- If moveAnnex rejects the content of the key,
-- treat that the same as its content having changed.
whenM (goodContent k f <&&> moveAnnex k f) $
logStatus k InfoMissing
liftIO $ do
nukeFile f
void $ tryIO $ removeDirectory $ parentDir f
{- Called when a direct mode file has been changed. Its old content may be
- lost. -}
changedDirect :: Key -> FilePath -> Annex ()
changedDirect oldk f = do
locs <- removeAssociatedFile oldk f
whenM (pure (null locs) <&&> not <$> inAnnex oldk) $
logStatus oldk InfoMissing
2015-12-15 19:34:28 +00:00
{- Git config settings to enable/disable direct mode. -}
setDirect :: Bool -> Annex ()
setDirect wantdirect = do
if wantdirect
then do
switchHEAD
setbare
else do
setbare
switchHEADBack
setConfig (annexConfig "direct") val
Annex.changeGitConfig $ \c -> c { annexDirect = wantdirect }
where
val = Git.Config.boolConfig wantdirect
coreworktree = ConfigKey "core.worktree"
indirectworktree = ConfigKey "core.indirect-worktree"
setbare = do
-- core.worktree is not compatable with
-- core.bare; git does not allow both to be set, so
-- unset it when enabling direct mode, caching in
-- core.indirect-worktree
if wantdirect
then moveconfig coreworktree indirectworktree
else moveconfig indirectworktree coreworktree
setConfig (ConfigKey Git.Config.coreBare) val
moveconfig src dest = do
v <- getConfigMaybe src
case v of
Nothing -> noop
Just wt -> do
unsetConfig src
setConfig dest wt
reloadConfig
{- Since direct mode sets core.bare=true, incoming pushes could change
- the currently checked out branch. To avoid this problem, HEAD
- is changed to a internal ref that nothing is going to push to.
-
- For refs/heads/master, use refs/heads/annex/direct/master;
- this way things that show HEAD (eg shell prompts) will
- hopefully show just "master". -}
directBranch :: Ref -> Ref
directBranch orighead = case splitc '/' $ fromRef orighead of
("refs":"heads":"annex":"direct":_) -> orighead
("refs":"heads":rest) ->
Ref $ "refs/heads/annex/direct/" ++ intercalate "/" rest
_ -> Ref $ "refs/heads/" ++ fromRef (Git.Ref.base orighead)
{- Converts a directBranch back to the original branch.
-
- Any other ref is left unchanged.
-}
fromDirectBranch :: Ref -> Ref
fromDirectBranch directhead = case splitc '/' $ fromRef directhead of
("refs":"heads":"annex":"direct":rest) ->
Ref $ "refs/heads/" ++ intercalate "/" rest
_ -> directhead
switchHEAD :: Annex ()
switchHEAD = maybe noop switch =<< inRepo Git.Branch.currentUnsafe
where
switch orighead = do
let newhead = directBranch orighead
2016-03-31 16:27:48 +00:00
maybe noop (inRepo . Git.Branch.update "entering direct mode" newhead)
=<< inRepo (Git.Ref.sha orighead)
inRepo $ Git.Branch.checkout newhead
switchHEADBack :: Annex ()
switchHEADBack = maybe noop switch =<< inRepo Git.Branch.currentUnsafe
where
switch currhead = do
let orighead = fromDirectBranch currhead
v <- inRepo $ Git.Ref.sha currhead
case v of
Just headsha
| orighead /= currhead -> do
2016-03-31 16:27:48 +00:00
inRepo $ Git.Branch.update "leaving direct mode" orighead headsha
inRepo $ Git.Branch.checkout orighead
inRepo $ Git.Branch.delete currhead
_ -> inRepo $ Git.Branch.checkout orighead