git-annex/Annex/Branch.hs

492 lines
16 KiB
Haskell
Raw Normal View History

2011-06-21 20:08:09 +00:00
{- management of the git-annex branch
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
2011-10-04 04:40:47 +00:00
module Annex.Branch (
create,
2011-06-21 20:08:09 +00:00
update,
disableUpdate,
get,
change,
2011-06-22 21:47:06 +00:00
commit,
files,
refExists,
hasOrigin,
hasSomeBranch,
name
2011-06-21 20:08:09 +00:00
) where
import System.IO.Binary
import System.Exit
import qualified Data.ByteString.Lazy.Char8 as L
2011-10-05 20:02:51 +00:00
import Common.Annex
import Annex.Exception
import Types.BranchState
import qualified Git
import qualified Git.UnionMerge
import qualified Annex
2011-10-04 04:40:47 +00:00
import Annex.CatFile
{- Name of the branch that is used to store git-annex's information. -}
name :: Git.Ref
name = Git.Ref "git-annex"
{- Fully qualified name of the branch. -}
fullname :: Git.Ref
fullname = Git.Ref $ "refs/heads/" ++ show name
{- Branch's name in origin. -}
originname :: Git.Ref
originname = Git.Ref $ "origin/" ++ show name
{- Populates the branch's index file with the current branch contents.
-
- This is only done when the index doesn't yet exist, and the index
- is used to build up changes to be commited to the branch, and merge
- in changes from other branches.
-}
genIndex :: Git.Repo -> IO ()
2011-11-16 05:46:46 +00:00
genIndex g = Git.UnionMerge.stream_update_index g
[Git.UnionMerge.ls_tree fullname g]
{- Merges the specified branches into the index.
- Any changes staged in the index will be preserved. -}
mergeIndex :: [Git.Ref] -> Annex ()
mergeIndex branches = do
h <- catFileHandle
inRepo $ \g -> Git.UnionMerge.merge_index h g branches
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
{- Runs an action using the branch's index file. -}
withIndex :: Annex a -> Annex a
withIndex = withIndex' False
withIndex' :: Bool -> Annex a -> Annex a
withIndex' bootstrapping a = do
f <- fromRepo gitAnnexIndex
bracketIO (Git.useIndex f) id $ do
unlessM (liftIO $ doesFileExist f) $ do
unless bootstrapping create
liftIO $ createDirectoryIfMissing True $ takeDirectory f
unless bootstrapping $ inRepo genIndex
a
{- Updates the branch's index to reflect the current contents of the branch.
- Any changes staged in the index will be preserved.
-
- Compares the ref stored in the lock file with the current
- ref of the branch to see if an update is needed.
-}
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
updateIndex :: Annex (Maybe Git.Ref)
updateIndex = do
branchref <- getRef fullname
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
go branchref
return branchref
where
go Nothing = return ()
go (Just branchref) = do
lock <- fromRepo gitAnnexIndexLock
lockref <- firstRef <$> liftIO (catchDefaultIO (readFileStrict lock) "")
when (lockref /= branchref) $ do
withIndex $ mergeIndex [fullname]
setIndexRef branchref
{- Record that the branch's index has been updated to correspond to a
- given ref of the branch. -}
setIndexRef :: Git.Ref -> Annex ()
setIndexRef ref = do
lock <- fromRepo gitAnnexIndexLock
liftIO $ writeFile lock $ show ref ++ "\n"
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
{- Commits the staged changes in the index to the branch.
-
- Ensures that the branch's index file is first updated to include the
- current state of the branch, before running the commit action. This
- is needed because the branch may have had changes pushed to it, that
- are not yet reflected in the index.
-
- Also safely handles a race that can occur if a change is being pushed
- into the branch at the same time. When the race happens, the commit will
- be made on top of the newly pushed change, but without the index file
- being updated to include it. The result is that the newly pushed
- change is reverted. This race is detected and another commit made
- to fix it.
-}
commitBranch :: String -> [Git.Ref] -> Annex ()
commitBranch message parents = do
expected <- updateIndex
committedref <- inRepo $ Git.commit message fullname parents
setIndexRef committedref
parentrefs <- commitparents <$> catObject committedref
when (racedetected expected parentrefs) $
fixrace committedref parentrefs
where
-- look for "parent ref" lines and return the refs
commitparents = map (Git.Ref . snd) . filter isparent .
map (toassoc . L.unpack) . L.lines
toassoc = separate (== ' ')
isparent (k,_) = k == "parent"
{- The race can be detected by checking the commit's
- parent, which will be the newly pushed branch,
- instead of the expected ref that the index was updated to. -}
racedetected Nothing parentrefs
| null parentrefs = False -- first commit, no parents
| otherwise = True -- race on first commit
racedetected (Just expectedref) parentrefs
| expectedref `elem` parentrefs = False -- good parent
| otherwise = True -- race!
{- To recover from the race, union merge the lost refs
- into the index, and recommit on top of the bad commit. -}
fixrace committedref lostrefs = do
mergeIndex lostrefs
commitBranch racemessage [committedref]
racemessage = message ++ " (recovery from race)"
{- Runs an action using the branch's index file, first making sure that
- the branch and index are up-to-date. -}
withIndexUpdate :: Annex a -> Annex a
withIndexUpdate a = update >> withIndex a
getState :: Annex BranchState
getState = Annex.getState Annex.branchstate
setState :: BranchState -> Annex ()
setState state = Annex.changeState $ \s -> s { Annex.branchstate = state }
setCache :: FilePath -> String -> Annex ()
setCache file content = do
state <- getState
setState state { cachedFile = Just file, cachedContent = content }
invalidateCache :: Annex ()
invalidateCache = do
state <- getState
setState state { cachedFile = Nothing, cachedContent = "" }
getCache :: FilePath -> Annex (Maybe String)
getCache file = getState >>= go
where
go state
| cachedFile state == Just file =
return $ Just $ cachedContent state
| otherwise = return Nothing
{- Creates the branch, if it does not already exist. -}
create :: Annex ()
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
create = unlessM hasBranch $ hasOrigin >>= go
where
go True = do
inRepo $ Git.run "branch"
[Param $ show name, Param $ show originname]
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
maybe (return ()) setIndexRef =<< getRef fullname
go False = withIndex' True $
setIndexRef =<< (inRepo $ Git.commit "branch created" fullname [])
{- Stages the journal, and commits staged changes to the branch. -}
commit :: String -> Annex ()
commit message = whenM journalDirty $ lockJournal $ do
stageJournalFiles
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
withIndex $ commitBranch message [fullname]
{- Ensures that the branch and index are is up-to-date; should be
- called before data is read from it. Runs only once per git-annex run.
-
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
- Before refs are merged into the index, it's important to first stage the
- journal into the index. Otherwise, any changes in the journal would
- later get staged, and might overwrite changes made during the merge.
-
- (It would be cleaner to handle the merge by updating the journal, not the
- index, with changes from the branches.)
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
-
- The index is always updated using a union merge, as that's the most
- efficient way to update it. However, if the branch can be
- fast-forwarded, that is then done, rather than adding an unnecessary
- commit to it.
-}
update :: Annex ()
update = onceonly $ do
-- ensure branch exists, and index is up-to-date
create
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
_ <- updateIndex
-- check what needs updating before taking the lock
dirty <- journalDirty
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
c <- filterM (changedBranch name . snd) =<< siblingBranches
let (refs, branches) = unzip c
unless (not dirty && null refs) $ withIndex $ lockJournal $ do
when dirty stageJournalFiles
let merge_desc = if null branches
then "update"
else "merging " ++
2011-12-10 16:30:31 +00:00
unwords (map Git.refDescribe branches) ++
" into " ++ show name
unless (null branches) $ do
showSideAction merge_desc
mergeIndex branches
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
ff <- if dirty then return False else tryFastForwardTo refs
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
unless ff $ commitBranch merge_desc (nub $ fullname:refs)
invalidateCache
where
onceonly a = unlessM (branchUpdated <$> getState) $ do
r <- a
disableUpdate
return r
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
{- Checks if the second branch has any commits not present on the first
- branch. -}
changedBranch :: Git.Branch -> Git.Branch -> Annex Bool
changedBranch origbranch newbranch = not . L.null <$> diffs
where
diffs = inRepo $ Git.pipeRead
[ Param "log"
, Param (show origbranch ++ ".." ++ show newbranch)
, Params "--oneline -n1"
]
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
{- Given a set of refs that are all known to have commits not
- on the git-annex branch, tries to update the branch by a
- fast-forward.
-
- In order for that to be possible, one of the refs must contain
- every commit present in all the other refs, as well as in the
- git-annex branch.
-}
tryFastForwardTo :: [Git.Ref] -> Annex Bool
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
tryFastForwardTo [] = return True
tryFastForwardTo (first:rest) = do
-- First, check that the git-annex branch does not contain any
2011-11-30 16:01:15 +00:00
-- new commits that are not in the first other branch. If it does,
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
-- cannot fast-forward.
diverged <- changedBranch first fullname
if diverged
then no_ff
else maybe no_ff do_ff =<< findbest first rest
where
no_ff = return False
do_ff branch = do
2011-11-30 16:01:15 +00:00
inRepo $ Git.run "update-ref"
[Param $ show fullname, Param $ show branch]
merge: Use fast-forward merges when possible. Thanks Valentin Haenel for a test case showing how non-fast-forward merges could result in an ongoing pull/merge/push cycle. While the git-annex branch is fast-forwarded, git-annex's index file is still updated using the union merge strategy as before. There's no other way to update the index that would be any faster. It is possible that a union merge and a fast-forward result in different file contents: Files should have the same lines, but a union merge may change their order. If this happens, the next commit made to the git-annex branch will have some unnecessary changes to line orders, but the consistency of data should be preserved. Note that when the journal contains changes, a fast-forward is never attempted, which is fine, because committing those changes would be vanishingly unlikely to leave the git-annex branch at a commit that already exists in one of the remotes. The real difficulty is handling the case where multiple remotes have all changed. git-annex does find the best (ie, newest) one and fast forwards to it. If the remotes are diverged, no fast-forward is done at all. It would be possible to pick one, fast forward to it, and make a merge commit to the rest, I see no benefit to adding that complexity. Determining the best of N changed remotes requires N*2+1 calls to git-log, but these are fast git-log calls, and N is typically small. Also, typically some or all of the remote refs will be the same, and git-log is not called to compare those. In the real world I expect this will almost always add only 1 git-log call to the merge process. (Which already makes N anyway.)
2011-11-06 19:18:45 +00:00
return True
findbest c [] = return $ Just c
findbest c (r:rs)
| c == r = findbest c rs
| otherwise = do
better <- changedBranch c r
worse <- changedBranch r c
case (better, worse) of
(True, True) -> return Nothing -- divergent fail
(True, False) -> findbest r rs -- better
(False, True) -> findbest c rs -- worse
(False, False) -> findbest c rs -- same
2011-10-27 22:01:46 +00:00
{- Avoids updating the branch. A useful optimisation when the branch
- is known to have not changed, or git-annex won't be relying on info
- from it. -}
disableUpdate :: Annex ()
disableUpdate = Annex.changeState setupdated
where
setupdated s = s { Annex.branchstate = new }
where
new = old { branchUpdated = True }
old = Annex.branchstate s
{- Checks if a git ref exists. -}
refExists :: Git.Ref -> Annex Bool
refExists ref = inRepo $ Git.runBool "show-ref"
[Param "--verify", Param "-q", Param $ show ref]
{- Does the main git-annex branch exist? -}
hasBranch :: Annex Bool
hasBranch = refExists fullname
{- Does origin/git-annex exist? -}
hasOrigin :: Annex Bool
hasOrigin = refExists originname
{- Does the git-annex branch or a foo/git-annex branch exist? -}
hasSomeBranch :: Annex Bool
hasSomeBranch = not . null <$> siblingBranches
{- List of git-annex (refs, branches), including the main one and any
- from remotes. Duplicate refs are filtered out. -}
siblingBranches :: Annex [(Git.Ref, Git.Branch)]
siblingBranches = do
r <- inRepo $ Git.pipeRead [Param "show-ref", Param $ show name]
return $ nubBy uref $ map (gen . words . L.unpack) (L.lines r)
where
gen l = (Git.Ref $ head l, Git.Ref $ last l)
uref (a, _) (b, _) = a == b
{- Get the ref of a branch. -}
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
getRef :: Git.Ref -> Annex (Maybe Git.Ref)
getRef branch = process . L.unpack <$> showref
where
showref = inRepo $ Git.pipeRead [Param "show-ref",
Param "--hash", -- get the hash
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
Params "--verify", -- only exact match
Param $ show branch]
detect and recover from branch push/commit race Dealing with a race without using locking is exceedingly difficult and tricky. Fully tested, I hope. There are three places left where the branch can be updated, that are not covered by the race recovery code. Let's prove they're all immune to the race: 1. tryFastForwardTo checks to see if a fast-forward can be done, and then does git-update-ref on the branch to fast-forward it. If a push comes in before the check, then either no fast-forward will be done (ok), or the push set the branch to a ref that can still be fast-forwarded (also ok) If a push comes in after the check, the git-update-ref will undo the ref change made by the push. It's as if the push did not come in, and the next git-push will see this, and try to re-do it. (acceptable) 2. When creating the branch for the very first time, an empty index is created, and a commit of it made to the branch. The commit's ref is recorded as the current state of the index. If a push came in during that, it will be noticed the next time a commit is made to the branch, since the branch will have changed. (ok) 3. Creating the branch from an existing remote branch involves making the branch, and then getting its ref, and recording that the index reflects that ref. If a push creates the branch first, git-branch will fail (ok). If the branch is created and a racing push is then able to change it (highly unlikely!) we're still ok, because it first records the ref into the index.lck, and then updating the index. The race can cause the index.lck to have the old branch ref, while the index has the newly pushed branch merged into it, but that only results in an unnecessary update of the index file later on.
2011-12-11 22:39:53 +00:00
process [] = Nothing
process s = Just $ firstRef s
firstRef :: String-> Git.Ref
firstRef = Git.Ref . takeWhile (/= '\n')
{- Applies a function to modifiy the content of a file.
-
- Note that this does not cause the branch to be merged, it only
- modifes the current content of the file on the branch.
-}
change :: FilePath -> (String -> String) -> Annex ()
change file a = lockJournal $ getStale file >>= return . a >>= set file
{- Records new content of a file into the journal. -}
set :: FilePath -> String -> Annex ()
set file content = do
setJournalFile file content
setCache file content
{- Gets the content of a file on the branch, or content from the journal, or
- staged in the index.
-
- Returns an empty string if the file doesn't exist yet. -}
get :: FilePath -> Annex String
get = get' False
{- Like get, but does not merge the branch, so the info returned may not
- reflect changes in remotes. (Changing the value this returns, and then
- merging is always the same as using get, and then changing its value.) -}
getStale :: FilePath -> Annex String
getStale = get' True
get' :: Bool -> FilePath -> Annex String
get' staleok file = fromcache =<< getCache file
2011-10-10 21:37:44 +00:00
where
fromcache (Just content) = return content
fromcache Nothing = fromjournal =<< getJournalFile file
fromjournal (Just content) = cache content
fromjournal Nothing
| staleok = withIndex frombranch
| otherwise = withIndexUpdate $ frombranch >>= cache
frombranch = L.unpack <$> catFile fullname file
2011-10-10 21:37:44 +00:00
cache content = do
setCache file content
return content
{- Lists all files on the branch. There may be duplicates in the list. -}
files :: Annex [FilePath]
files = withIndexUpdate $ do
bfiles <- inRepo $ Git.pipeNullSplit
[Params "ls-tree --name-only -r -z", Param $ show fullname]
jfiles <- getJournalledFiles
return $ jfiles ++ bfiles
{- Records content for a file in the branch to the journal.
-
- Using the journal, rather than immediatly staging content to the index
- avoids git needing to rewrite the index after every change. -}
setJournalFile :: FilePath -> String -> Annex ()
setJournalFile file content = do
g <- gitRepo
liftIO $ doRedo (write g) $ do
createDirectoryIfMissing True $ gitAnnexJournalDir g
createDirectoryIfMissing True $ gitAnnexTmpDir g
where
-- journal file is written atomically
write g = do
let jfile = journalFile g file
let tmpfile = gitAnnexTmpDir g </> takeFileName jfile
writeBinaryFile tmpfile content
moveFile tmpfile jfile
{- Gets any journalled content for a file in the branch. -}
getJournalFile :: FilePath -> Annex (Maybe String)
getJournalFile file = inRepo $ \g -> catchMaybeIO $
readFileStrict $ journalFile g file
{- List of files that have updated content in the journal. -}
getJournalledFiles :: Annex [FilePath]
getJournalledFiles = map fileJournal <$> getJournalFiles
{- List of existing journal files. -}
getJournalFiles :: Annex [FilePath]
getJournalFiles = do
g <- gitRepo
fs <- liftIO $
catchDefaultIO (getDirectoryContents $ gitAnnexJournalDir g) []
2011-09-21 03:26:35 +00:00
return $ filter (`notElem` [".", ".."]) fs
{- Stages the specified journalfiles. -}
stageJournalFiles :: Annex ()
stageJournalFiles = do
fs <- getJournalFiles
g <- gitRepo
withIndex $ liftIO $ do
let dir = gitAnnexJournalDir g
let paths = map (dir </>) fs
-- inject all the journal files directly into git
-- in one quick command
2011-10-10 21:37:44 +00:00
(pid, fromh, toh) <- hPipeBoth "git" $ toCommand $ git_hash_object g
_ <- forkProcess $ do
hPutStr toh $ unlines paths
hClose toh
exitSuccess
hClose toh
shas <- map Git.Ref . lines <$> hGetContents fromh
-- update the index, also in just one command
Git.UnionMerge.update_index g $
index_lines shas (map fileJournal fs)
hClose fromh
forceSuccess pid
mapM_ removeFile paths
where
index_lines shas = map genline . zip shas
genline (sha, file) = Git.UnionMerge.update_index_line sha file
2011-11-11 05:52:58 +00:00
git_hash_object = Git.gitCommandLine
[Param "hash-object", Param "-w", Param "--stdin-paths"]
2011-10-10 21:37:44 +00:00
{- Checks if there are changes in the journal. -}
journalDirty :: Annex Bool
journalDirty = not . null <$> getJournalFiles
{- Produces a filename to use in the journal for a file on the branch.
-
- The journal typically won't have a lot of files in it, so the hashing
- used in the branch is not necessary, and all the files are put directly
- in the journal directory.
-}
journalFile :: Git.Repo -> FilePath -> FilePath
journalFile repo file = gitAnnexJournalDir repo </> concatMap mangle file
where
mangle '/' = "_"
mangle '_' = "__"
mangle c = [c]
{- Converts a journal file (relative to the journal dir) back to the
- filename on the branch. -}
fileJournal :: FilePath -> FilePath
fileJournal = replace "//" "_" . replace "_" "/"
{- Runs an action that modifies the journal, using locking to avoid
- contention with other git-annex processes. -}
lockJournal :: Annex a -> Annex a
lockJournal a = do
2011-11-11 05:52:58 +00:00
file <- fromRepo gitAnnexJournalLock
bracketIO (lock file) unlock a
2011-10-03 22:20:29 +00:00
where
lock file = do
l <- doRedo (createFile file stdFileMode) $
createDirectoryIfMissing True $ takeDirectory file
2011-10-03 22:20:29 +00:00
waitToSetLock l (WriteLock, AbsoluteSeek, 0, 0)
return l
unlock = closeFd
{- Runs an action, catching failure and running something to fix it up, and
- retrying if necessary. -}
doRedo :: IO a -> IO b -> IO a
doRedo a b = catch a $ const $ b >> a