From fcd5c167efbaf409e64e8e56114584f094101fb5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Aug 2013 15:57:42 -0400 Subject: [PATCH] untested transition detection on merging, and transition running code --- Annex/Branch.hs | 112 ++++++++++++++++++++++++++++++++++++++++---- Locations.hs | 5 ++ Logs/Transitions.hs | 30 +++++++----- doc/git-annex.mdwn | 18 +++++++ 4 files changed, 142 insertions(+), 23 deletions(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index bc3736a9a6..fa4b0265de 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -1,6 +1,6 @@ {- management of the git-annex branch - - - Copyright 2011-2012 Joey Hess + - Copyright 2011-2013 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -22,9 +22,12 @@ module Annex.Branch ( commit, files, withIndex, + performTransitions, ) where import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Set as S +import qualified Data.Map as M import Common.Annex import Annex.BranchState @@ -32,6 +35,7 @@ import Annex.Journal import qualified Git import qualified Git.Command import qualified Git.Ref +import qualified Git.Sha import qualified Git.Branch import qualified Git.UnionMerge import qualified Git.UpdateIndex @@ -42,6 +46,8 @@ import Annex.CatFile import Annex.Perms import qualified Annex import Utility.Env +import Logs.Transitions +import Annex.ReplaceFile {- Name of the branch that is used to store git-annex's information. -} name :: Git.Ref @@ -110,6 +116,9 @@ forceUpdate = updateTo =<< siblingBranches - later get staged, and might overwrite changes made during the merge. - This is only done if some of the Refs do need to be merged. - + - Also handles performing any Transitions that have not yet been + - performed, in either the local branch, or the Refs. + - - Returns True if any refs were merged in, False otherwise. -} updateTo :: [(Git.Ref, Git.Branch)] -> Annex Bool @@ -117,7 +126,8 @@ updateTo pairs = do -- ensure branch exists, and get its current ref branchref <- getBranch dirty <- journalDirty - (refs, branches) <- unzip <$> filterM isnewer pairs + ignoredrefs <- getIgnoredRefs + (refs, branches) <- unzip <$> filterM (isnewer ignoredrefs) pairs if null refs {- Even when no refs need to be merged, the index - may still be updated if the branch has gotten ahead @@ -132,7 +142,9 @@ updateTo pairs = do else lockJournal $ go branchref dirty refs branches return $ not $ null refs where - isnewer (r, _) = inRepo $ Git.Branch.changed fullname r + isnewer ignoredrefs (r, _) + | S.member r ignoredrefs = return False + | otherwise = inRepo $ Git.Branch.changed fullname r go branchref dirty refs branches = withIndex $ do cleanjournal <- if dirty then stageJournal else return noop let merge_desc = if null branches @@ -140,16 +152,23 @@ updateTo pairs = do else "merging " ++ unwords (map Git.Ref.describe branches) ++ " into " ++ show name + localtransitions <- parseTransitionsStrictly "local" + <$> getStale transitionsLog unless (null branches) $ do showSideAction merge_desc mergeIndex refs - ff <- if dirty - then return False - else inRepo $ Git.Branch.fastForward fullname refs - if ff - then updateIndex branchref - else commitBranch branchref merge_desc - (nub $ fullname:refs) + let commitrefs = nub $ fullname:refs + transitioned <- handleTransitions localtransitions commitrefs + case transitioned of + Nothing -> do + ff <- if dirty + then return False + else inRepo $ Git.Branch.fastForward fullname refs + if ff + then updateIndex branchref + else commitBranch branchref merge_desc commitrefs + Just (branchref', commitrefs') -> + commitBranch branchref' merge_desc commitrefs' liftIO cleanjournal {- Gets the content of a file, which may be in the journal, or in the index @@ -361,3 +380,76 @@ stageJournal = withIndex $ do sha <- hashFile h path streamer $ Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath $ fileJournal file) + +{- This is run after the refs have been merged into the index, + - but before the result is committed to the branch. + - Which is why it's passed the contents of the local branches's + - transition log before that merge took place. + - + - When the refs contain transitions that have not yet been done locally, + - the transitions are performed on the index, and a new branch + - is created from the result, and returned. + - + - When there are transitions recorded locally that have not been done + - to the remote refs, the transitions are performed in the index, + - and the existing branch is returned. In this case, the untransitioned + - remote refs cannot be merged into the branch (since transitions + - throw away history), so none of them are included in the returned + - list of refs, and they are added to the list of refs to ignore, + - to avoid re-merging content from them again. + -} +handleTransitions :: Transitions -> [Git.Ref] -> Annex (Maybe (Git.Branch, [Git.Ref])) +handleTransitions localts refs = do + m <- M.fromList <$> mapM getreftransition refs + let remotets = M.elems m + if all (localts ==) remotets + then return Nothing + else do + let allts = combineTransitions (localts:remotets) + let (transitionedrefs, untransitionedrefs) = + partition (\r -> M.lookup r m == Just allts) refs + transitionedbranch <- performTransitions allts (localts /= allts) + ignoreRefs untransitionedrefs + return $ Just (transitionedbranch, transitionedrefs) + where + getreftransition ref = do + ts <- parseTransitionsStrictly "remote" . L.unpack + <$> catFile ref transitionsLog + return (ref, ts) + +ignoreRefs :: [Git.Ref] -> Annex () +ignoreRefs rs = do + old <- getIgnoredRefs + let s = S.unions [old, S.fromList rs] + f <- fromRepo gitAnnexIgnoredRefs + replaceFile f $ \tmp -> liftIO $ writeFile tmp $ + unlines $ map show $ S.elems s + +getIgnoredRefs :: Annex (S.Set Git.Ref) +getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content + where + content = do + f <- fromRepo gitAnnexIgnoredRefs + liftIO $ catchDefaultIO "" $ readFile f + +{- Performs the specified transitions on the contents of the index file, + - commits it to the branch, or creates a new branch, and returns + - the branch's ref. -} +performTransitions :: Transitions -> Bool -> Annex Git.Ref +performTransitions ts neednewbranch = withIndex $ do + when (inTransitions ForgetDeadRemotes ts) $ + error "TODO ForgetDeadRemotes transition" + if neednewbranch + then do + committedref <- inRepo $ Git.Branch.commit message fullname [] + setIndexSha committedref + return committedref + else do + ref <- getBranch + commitBranch ref message [fullname] + getBranch + where + message + | neednewbranch = "new branch for transition " ++ tdesc + | otherwise = "continuing transition " ++ tdesc + tdesc = show $ map describeTransition $ transitionList ts diff --git a/Locations.hs b/Locations.hs index 1cbbb9886a..7762afb641 100644 --- a/Locations.hs +++ b/Locations.hs @@ -35,6 +35,7 @@ module Locations ( gitAnnexJournalLock, gitAnnexIndex, gitAnnexIndexLock, + gitAnnexIgnoredRefs, gitAnnexPidFile, gitAnnexDaemonStatusFile, gitAnnexLogFile, @@ -225,6 +226,10 @@ gitAnnexIndex r = gitAnnexDir r "index" gitAnnexIndexLock :: Git.Repo -> FilePath gitAnnexIndexLock r = gitAnnexDir r "index.lck" +{- List of refs that should not be merged into the git-annex branch. -} +gitAnnexIgnoredRefs :: Git.Repo -> FilePath +gitAnnexIgnoredRefs r = gitAnnexDir r "ignoredrefs" + {- Pid file for daemon mode. -} gitAnnexPidFile :: Git.Repo -> FilePath gitAnnexPidFile r = gitAnnexDir r "daemon.pid" diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index e548a2f232..41f4b26353 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -46,6 +46,10 @@ data TransitionLine = TransitionLine type Transitions = S.Set TransitionLine +describeTransition :: Transition -> String +describeTransition ForgetGitHistory = "forget git history" +describeTransition ForgetDeadRemotes = "forget dead remotes" + addTransition :: POSIXTime -> Transition -> Transitions -> Transitions addTransition ts t = S.insert $ TransitionLine ts t @@ -60,6 +64,11 @@ parseTransitions = check . map parseTransitionLine . lines | all isJust l = Just $ S.fromList $ catMaybes l | otherwise = Nothing +parseTransitionsStrictly :: String -> String -> Transitions +parseTransitionsStrictly source = fromMaybe badsource . parseTransitions + where + badsource = error $ "unknown transitions listed in " ++ source ++ "; upgrade git-annex!" + showTransitionLine :: TransitionLine -> String showTransitionLine (TransitionLine ts t) = unwords [show t, show ts] @@ -71,17 +80,14 @@ parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts ds = unwords $ Prelude.tail ws pdate = parseTime defaultTimeLocale "%s%Qs" >=*> utcTimeToPOSIXSeconds -{- Compares two sets of transitions, and returns a list of any transitions - - from the second set that have not yet been perfomed in the first, - - and a list of any transitions from the first set that have not yet been - - performed in the second. -} -diffTransitions :: Transitions -> Transitions -> ([Transition], [Transition]) -diffTransitions a b = (b `diff` a, a `diff` b) - where - diff x y = map transition $ S.elems $ S.difference x y +combineTransitions :: [Transitions] -> Transitions +combineTransitions = S.unions -sameTransitions :: Transitions -> Transitions -> Bool -sameTransitions a b = let (x, y) = diffTransitions a b in null x && null y +inTransitions :: Transition -> Transitions -> Bool +inTransitions t = not . S.null . S.filter (\l -> transition l == t) + +transitionList :: Transitions -> [Transition] +transitionList = map transition . S.elems {- Typically ran with Annex.Branch.change, but we can't import Annex.Branch - here since it depends on this module. -} @@ -89,6 +95,4 @@ recordTransition :: (FilePath -> (String -> String) -> Annex ()) -> Transition - recordTransition changer o = do t <- liftIO getPOSIXTime changer transitionsLog $ - showTransitions . addTransition t o . fromMaybe badlog . parseTransitions - where - badlog = error $ "unknown transitions exist in " ++ transitionsLog + showTransitions . addTransition t o . parseTransitionsStrictly "local" diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 7cac9087d1..72e376d649 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -479,6 +479,24 @@ subdirectories). Upgrades the repository to current layout. +* forget + + Causes the git-annex branch to be rewritten, throwing away historical + data about past locations of files, files that are no longer present on + any remote, etc. The resulting branch will use less space, but for + example `git annex log` will not be able to show where files used to + be located. + + To also prune references to remotes that have been marked as dead, + specify --forget-dead. + + When this rewritten branch is merged into other clones of + the repository, git-annex will automatically perform the same rewriting + to their local git-annex branch. So the forgetfulness will automatically + propigate out from its starting point until all repositories running + git-annex have forgotten their old history. (You may need to force + git to push the branch to any git repositories not running git-annex. + # QUERY COMMANDS * version