From 511cf77b6dce10f965a0c4cf81da371a7133cc72 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Aug 2013 13:19:02 -0400 Subject: [PATCH 1/9] add transition log --- Logs/Transitions.hs | 94 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 Logs/Transitions.hs diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs new file mode 100644 index 0000000000..e548a2f232 --- /dev/null +++ b/Logs/Transitions.hs @@ -0,0 +1,94 @@ +{- git-annex transitions log + - + - This is used to record transitions that have been performed on the + - git-annex branch, and when the transition was first started. + - + - We can quickly detect when the local branch has already had an transition + - done that is listed in the remote branch by checking that the local + - branch contains the same transition, with the same or newer start time. + - + - When a remote branch that has had an transition performed on it + - becomes available for merging into the local git-annex branch, + - the transition is first performed on the local branch. + - + - When merging a remote branch into the local git-annex branch, + - all transitions that have been performed on the local branch must also + - have been performed on the remote branch already. (Or it would be + - possible to perform the transitions on a fixup branch and merge it, + - but that would be expensive.) + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Transitions where + +import Data.Time.Clock.POSIX +import Data.Time +import System.Locale +import qualified Data.Set as S + +import Common.Annex + +transitionsLog :: FilePath +transitionsLog = "transitions.log" + +data Transition + = ForgetGitHistory + | ForgetDeadRemotes + deriving (Show, Ord, Eq, Read) + +data TransitionLine = TransitionLine + { transitionStarted :: POSIXTime + , transition :: Transition + } deriving (Show, Ord, Eq) + +type Transitions = S.Set TransitionLine + +addTransition :: POSIXTime -> Transition -> Transitions -> Transitions +addTransition ts t = S.insert $ TransitionLine ts t + +showTransitions :: Transitions -> String +showTransitions = unlines . map showTransitionLine . S.elems + +{- If the log contains new transitions we don't support, returns Nothing. -} +parseTransitions :: String -> Maybe Transitions +parseTransitions = check . map parseTransitionLine . lines + where + check l + | all isJust l = Just $ S.fromList $ catMaybes l + | otherwise = Nothing + +showTransitionLine :: TransitionLine -> String +showTransitionLine (TransitionLine ts t) = unwords [show t, show ts] + +parseTransitionLine :: String -> Maybe TransitionLine +parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts + where + ws = words s + ts = Prelude.head ws + ds = unwords $ Prelude.tail ws + pdate = parseTime defaultTimeLocale "%s%Qs" >=*> utcTimeToPOSIXSeconds + +{- Compares two sets of transitions, and returns a list of any transitions + - from the second set that have not yet been perfomed in the first, + - and a list of any transitions from the first set that have not yet been + - performed in the second. -} +diffTransitions :: Transitions -> Transitions -> ([Transition], [Transition]) +diffTransitions a b = (b `diff` a, a `diff` b) + where + diff x y = map transition $ S.elems $ S.difference x y + +sameTransitions :: Transitions -> Transitions -> Bool +sameTransitions a b = let (x, y) = diffTransitions a b in null x && null y + +{- Typically ran with Annex.Branch.change, but we can't import Annex.Branch + - here since it depends on this module. -} +recordTransition :: (FilePath -> (String -> String) -> Annex ()) -> Transition -> Annex () +recordTransition changer o = do + t <- liftIO getPOSIXTime + changer transitionsLog $ + showTransitions . addTransition t o . fromMaybe badlog . parseTransitions + where + badlog = error $ "unknown transitions exist in " ++ transitionsLog From fcd5c167efbaf409e64e8e56114584f094101fb5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Aug 2013 15:57:42 -0400 Subject: [PATCH 2/9] untested transition detection on merging, and transition running code --- Annex/Branch.hs | 112 ++++++++++++++++++++++++++++++++++++++++---- Locations.hs | 5 ++ Logs/Transitions.hs | 30 +++++++----- doc/git-annex.mdwn | 18 +++++++ 4 files changed, 142 insertions(+), 23 deletions(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index bc3736a9a6..fa4b0265de 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -1,6 +1,6 @@ {- management of the git-annex branch - - - Copyright 2011-2012 Joey Hess + - Copyright 2011-2013 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -22,9 +22,12 @@ module Annex.Branch ( commit, files, withIndex, + performTransitions, ) where import qualified Data.ByteString.Lazy.Char8 as L +import qualified Data.Set as S +import qualified Data.Map as M import Common.Annex import Annex.BranchState @@ -32,6 +35,7 @@ import Annex.Journal import qualified Git import qualified Git.Command import qualified Git.Ref +import qualified Git.Sha import qualified Git.Branch import qualified Git.UnionMerge import qualified Git.UpdateIndex @@ -42,6 +46,8 @@ import Annex.CatFile import Annex.Perms import qualified Annex import Utility.Env +import Logs.Transitions +import Annex.ReplaceFile {- Name of the branch that is used to store git-annex's information. -} name :: Git.Ref @@ -110,6 +116,9 @@ forceUpdate = updateTo =<< siblingBranches - later get staged, and might overwrite changes made during the merge. - This is only done if some of the Refs do need to be merged. - + - Also handles performing any Transitions that have not yet been + - performed, in either the local branch, or the Refs. + - - Returns True if any refs were merged in, False otherwise. -} updateTo :: [(Git.Ref, Git.Branch)] -> Annex Bool @@ -117,7 +126,8 @@ updateTo pairs = do -- ensure branch exists, and get its current ref branchref <- getBranch dirty <- journalDirty - (refs, branches) <- unzip <$> filterM isnewer pairs + ignoredrefs <- getIgnoredRefs + (refs, branches) <- unzip <$> filterM (isnewer ignoredrefs) pairs if null refs {- Even when no refs need to be merged, the index - may still be updated if the branch has gotten ahead @@ -132,7 +142,9 @@ updateTo pairs = do else lockJournal $ go branchref dirty refs branches return $ not $ null refs where - isnewer (r, _) = inRepo $ Git.Branch.changed fullname r + isnewer ignoredrefs (r, _) + | S.member r ignoredrefs = return False + | otherwise = inRepo $ Git.Branch.changed fullname r go branchref dirty refs branches = withIndex $ do cleanjournal <- if dirty then stageJournal else return noop let merge_desc = if null branches @@ -140,16 +152,23 @@ updateTo pairs = do else "merging " ++ unwords (map Git.Ref.describe branches) ++ " into " ++ show name + localtransitions <- parseTransitionsStrictly "local" + <$> getStale transitionsLog unless (null branches) $ do showSideAction merge_desc mergeIndex refs - ff <- if dirty - then return False - else inRepo $ Git.Branch.fastForward fullname refs - if ff - then updateIndex branchref - else commitBranch branchref merge_desc - (nub $ fullname:refs) + let commitrefs = nub $ fullname:refs + transitioned <- handleTransitions localtransitions commitrefs + case transitioned of + Nothing -> do + ff <- if dirty + then return False + else inRepo $ Git.Branch.fastForward fullname refs + if ff + then updateIndex branchref + else commitBranch branchref merge_desc commitrefs + Just (branchref', commitrefs') -> + commitBranch branchref' merge_desc commitrefs' liftIO cleanjournal {- Gets the content of a file, which may be in the journal, or in the index @@ -361,3 +380,76 @@ stageJournal = withIndex $ do sha <- hashFile h path streamer $ Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath $ fileJournal file) + +{- This is run after the refs have been merged into the index, + - but before the result is committed to the branch. + - Which is why it's passed the contents of the local branches's + - transition log before that merge took place. + - + - When the refs contain transitions that have not yet been done locally, + - the transitions are performed on the index, and a new branch + - is created from the result, and returned. + - + - When there are transitions recorded locally that have not been done + - to the remote refs, the transitions are performed in the index, + - and the existing branch is returned. In this case, the untransitioned + - remote refs cannot be merged into the branch (since transitions + - throw away history), so none of them are included in the returned + - list of refs, and they are added to the list of refs to ignore, + - to avoid re-merging content from them again. + -} +handleTransitions :: Transitions -> [Git.Ref] -> Annex (Maybe (Git.Branch, [Git.Ref])) +handleTransitions localts refs = do + m <- M.fromList <$> mapM getreftransition refs + let remotets = M.elems m + if all (localts ==) remotets + then return Nothing + else do + let allts = combineTransitions (localts:remotets) + let (transitionedrefs, untransitionedrefs) = + partition (\r -> M.lookup r m == Just allts) refs + transitionedbranch <- performTransitions allts (localts /= allts) + ignoreRefs untransitionedrefs + return $ Just (transitionedbranch, transitionedrefs) + where + getreftransition ref = do + ts <- parseTransitionsStrictly "remote" . L.unpack + <$> catFile ref transitionsLog + return (ref, ts) + +ignoreRefs :: [Git.Ref] -> Annex () +ignoreRefs rs = do + old <- getIgnoredRefs + let s = S.unions [old, S.fromList rs] + f <- fromRepo gitAnnexIgnoredRefs + replaceFile f $ \tmp -> liftIO $ writeFile tmp $ + unlines $ map show $ S.elems s + +getIgnoredRefs :: Annex (S.Set Git.Ref) +getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content + where + content = do + f <- fromRepo gitAnnexIgnoredRefs + liftIO $ catchDefaultIO "" $ readFile f + +{- Performs the specified transitions on the contents of the index file, + - commits it to the branch, or creates a new branch, and returns + - the branch's ref. -} +performTransitions :: Transitions -> Bool -> Annex Git.Ref +performTransitions ts neednewbranch = withIndex $ do + when (inTransitions ForgetDeadRemotes ts) $ + error "TODO ForgetDeadRemotes transition" + if neednewbranch + then do + committedref <- inRepo $ Git.Branch.commit message fullname [] + setIndexSha committedref + return committedref + else do + ref <- getBranch + commitBranch ref message [fullname] + getBranch + where + message + | neednewbranch = "new branch for transition " ++ tdesc + | otherwise = "continuing transition " ++ tdesc + tdesc = show $ map describeTransition $ transitionList ts diff --git a/Locations.hs b/Locations.hs index 1cbbb9886a..7762afb641 100644 --- a/Locations.hs +++ b/Locations.hs @@ -35,6 +35,7 @@ module Locations ( gitAnnexJournalLock, gitAnnexIndex, gitAnnexIndexLock, + gitAnnexIgnoredRefs, gitAnnexPidFile, gitAnnexDaemonStatusFile, gitAnnexLogFile, @@ -225,6 +226,10 @@ gitAnnexIndex r = gitAnnexDir r "index" gitAnnexIndexLock :: Git.Repo -> FilePath gitAnnexIndexLock r = gitAnnexDir r "index.lck" +{- List of refs that should not be merged into the git-annex branch. -} +gitAnnexIgnoredRefs :: Git.Repo -> FilePath +gitAnnexIgnoredRefs r = gitAnnexDir r "ignoredrefs" + {- Pid file for daemon mode. -} gitAnnexPidFile :: Git.Repo -> FilePath gitAnnexPidFile r = gitAnnexDir r "daemon.pid" diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index e548a2f232..41f4b26353 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -46,6 +46,10 @@ data TransitionLine = TransitionLine type Transitions = S.Set TransitionLine +describeTransition :: Transition -> String +describeTransition ForgetGitHistory = "forget git history" +describeTransition ForgetDeadRemotes = "forget dead remotes" + addTransition :: POSIXTime -> Transition -> Transitions -> Transitions addTransition ts t = S.insert $ TransitionLine ts t @@ -60,6 +64,11 @@ parseTransitions = check . map parseTransitionLine . lines | all isJust l = Just $ S.fromList $ catMaybes l | otherwise = Nothing +parseTransitionsStrictly :: String -> String -> Transitions +parseTransitionsStrictly source = fromMaybe badsource . parseTransitions + where + badsource = error $ "unknown transitions listed in " ++ source ++ "; upgrade git-annex!" + showTransitionLine :: TransitionLine -> String showTransitionLine (TransitionLine ts t) = unwords [show t, show ts] @@ -71,17 +80,14 @@ parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts ds = unwords $ Prelude.tail ws pdate = parseTime defaultTimeLocale "%s%Qs" >=*> utcTimeToPOSIXSeconds -{- Compares two sets of transitions, and returns a list of any transitions - - from the second set that have not yet been perfomed in the first, - - and a list of any transitions from the first set that have not yet been - - performed in the second. -} -diffTransitions :: Transitions -> Transitions -> ([Transition], [Transition]) -diffTransitions a b = (b `diff` a, a `diff` b) - where - diff x y = map transition $ S.elems $ S.difference x y +combineTransitions :: [Transitions] -> Transitions +combineTransitions = S.unions -sameTransitions :: Transitions -> Transitions -> Bool -sameTransitions a b = let (x, y) = diffTransitions a b in null x && null y +inTransitions :: Transition -> Transitions -> Bool +inTransitions t = not . S.null . S.filter (\l -> transition l == t) + +transitionList :: Transitions -> [Transition] +transitionList = map transition . S.elems {- Typically ran with Annex.Branch.change, but we can't import Annex.Branch - here since it depends on this module. -} @@ -89,6 +95,4 @@ recordTransition :: (FilePath -> (String -> String) -> Annex ()) -> Transition - recordTransition changer o = do t <- liftIO getPOSIXTime changer transitionsLog $ - showTransitions . addTransition t o . fromMaybe badlog . parseTransitions - where - badlog = error $ "unknown transitions exist in " ++ transitionsLog + showTransitions . addTransition t o . parseTransitionsStrictly "local" diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 7cac9087d1..72e376d649 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -479,6 +479,24 @@ subdirectories). Upgrades the repository to current layout. +* forget + + Causes the git-annex branch to be rewritten, throwing away historical + data about past locations of files, files that are no longer present on + any remote, etc. The resulting branch will use less space, but for + example `git annex log` will not be able to show where files used to + be located. + + To also prune references to remotes that have been marked as dead, + specify --forget-dead. + + When this rewritten branch is merged into other clones of + the repository, git-annex will automatically perform the same rewriting + to their local git-annex branch. So the forgetfulness will automatically + propigate out from its starting point until all repositories running + git-annex have forgotten their old history. (You may need to force + git to push the branch to any git repositories not running git-annex. + # QUERY COMMANDS * version From 4a915cd3cdf5757637c9c8d0b43e1c208524ddb9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Aug 2013 16:38:03 -0400 Subject: [PATCH 3/9] add forget command Works, more or less. --dead is not implemented, and so far a new branch is made, but keys no longer present anywhere are not scrubbed. git annex sync fails to push the synced/git-annex branch after a forget, because it's not a fast-forward of the existing synced branch. Could be fixed by making git-annex sync use assistant-style sync branches. --- Annex/Branch.hs | 1 + Command/Forget.hs | 41 +++++++++++++++++++++++++++++++++++++++++ GitAnnex.hs | 2 ++ Logs/Transitions.hs | 10 ++++++---- doc/git-annex.mdwn | 2 +- 5 files changed, 51 insertions(+), 5 deletions(-) create mode 100644 Command/Forget.hs diff --git a/Annex/Branch.hs b/Annex/Branch.hs index fa4b0265de..5af6b6be97 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -402,6 +402,7 @@ handleTransitions :: Transitions -> [Git.Ref] -> Annex (Maybe (Git.Branch, [Git. handleTransitions localts refs = do m <- M.fromList <$> mapM getreftransition refs let remotets = M.elems m + liftIO $ print ("transitions", localts, remotets) if all (localts ==) remotets then return Nothing else do diff --git a/Command/Forget.hs b/Command/Forget.hs new file mode 100644 index 0000000000..e405a99181 --- /dev/null +++ b/Command/Forget.hs @@ -0,0 +1,41 @@ +{- git-annex command + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Forget where + +import Common.Annex +import Command +import qualified Annex.Branch as Branch +import Logs.Transitions +import qualified Annex + +import Data.Time.Clock.POSIX + +def :: [Command] +def = [command "forget" paramNothing seek + SectionMaintenance "prune git-annex branch history"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStart +start = do + showStart "forget" "git-annex" + next $ perform =<< Annex.getState Annex.force + +perform :: Bool -> CommandPerform +perform True = do + now <- liftIO getPOSIXTime + let ts = addTransition now ForgetGitHistory noTransitions + recordTransitions Branch.change ts + -- get branch committed before contining with the transition + Branch.update + void $ Branch.performTransitions ts True + next $ return True +perform False = do + showLongNote "To forget git-annex branch history, you must specify --force. This deletes metadata!" + stop diff --git a/GitAnnex.hs b/GitAnnex.hs index 05565e643b..1212edf9fe 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -67,6 +67,7 @@ import qualified Command.Map import qualified Command.Direct import qualified Command.Indirect import qualified Command.Upgrade +import qualified Command.Forget import qualified Command.Version import qualified Command.Help #ifdef WITH_ASSISTANT @@ -139,6 +140,7 @@ cmds = concat , Command.Direct.def , Command.Indirect.def , Command.Upgrade.def + , Command.Forget.def , Command.Version.def , Command.Help.def #ifdef WITH_ASSISTANT diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index 41f4b26353..d4b7d5eb35 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -50,6 +50,9 @@ describeTransition :: Transition -> String describeTransition ForgetGitHistory = "forget git history" describeTransition ForgetDeadRemotes = "forget dead remotes" +noTransitions :: Transitions +noTransitions = S.empty + addTransition :: POSIXTime -> Transition -> Transitions -> Transitions addTransition ts t = S.insert $ TransitionLine ts t @@ -91,8 +94,7 @@ transitionList = map transition . S.elems {- Typically ran with Annex.Branch.change, but we can't import Annex.Branch - here since it depends on this module. -} -recordTransition :: (FilePath -> (String -> String) -> Annex ()) -> Transition -> Annex () -recordTransition changer o = do - t <- liftIO getPOSIXTime +recordTransitions :: (FilePath -> (String -> String) -> Annex ()) -> Transitions -> Annex () +recordTransitions changer t = do changer transitionsLog $ - showTransitions . addTransition t o . parseTransitionsStrictly "local" + showTransitions . S.union t . parseTransitionsStrictly "local" diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 72e376d649..5fb0ce5a4d 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -488,7 +488,7 @@ subdirectories). be located. To also prune references to remotes that have been marked as dead, - specify --forget-dead. + specify --dead. When this rewritten branch is merged into other clones of the repository, git-annex will automatically perform the same rewriting From c181efe43786cc913385c931cf2ef8c34588dc36 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 13:31:29 -0400 Subject: [PATCH 4/9] use --force in taggedPush This should make the assistant force update its tagged push branch after a transition like git annex forget. --- Annex/TaggedPush.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs index 44a1a0eb0e..2a5f823fd3 100644 --- a/Annex/TaggedPush.hs +++ b/Annex/TaggedPush.hs @@ -49,6 +49,10 @@ fromTaggedBranch b = case split "/" $ show b of taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool taggedPush u info branch remote = Git.Command.runBool [ Param "push" + -- This is safe because we "own" the tagged branch we're pushing; + -- it has no other writers. Ensures it is pushed even if it has + -- been rewritten by a transition. + , Param "--force" , Param $ Remote.name remote , Param $ refspec Annex.Branch.name , Param $ refspec branch From 6cdac3a003b6850fd96a60d94320d084d8651096 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 14:15:32 -0400 Subject: [PATCH 5/9] sync, assistant: Force push of the git-annex branch. Necessary to ensure it gets pushed to remotes after being rewritten by forget. See inline rationalles for why I think this is safe! --- Annex/TaggedPush.hs | 10 +++++----- Command/Sync.hs | 42 +++++++++++++++++++++++++++++------------- Git/Branch.hs | 4 ++++ debian/changelog | 10 ++++++++++ 4 files changed, 48 insertions(+), 18 deletions(-) diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs index 2a5f823fd3..039dc0e173 100644 --- a/Annex/TaggedPush.hs +++ b/Annex/TaggedPush.hs @@ -13,6 +13,7 @@ import qualified Annex.Branch import qualified Git import qualified Git.Ref import qualified Git.Command +import qualified Git.Branch import Utility.Base64 {- Converts a git branch into a branch that is tagged with a UUID, typically @@ -49,12 +50,11 @@ fromTaggedBranch b = case split "/" $ show b of taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool taggedPush u info branch remote = Git.Command.runBool [ Param "push" - -- This is safe because we "own" the tagged branch we're pushing; - -- it has no other writers. Ensures it is pushed even if it has - -- been rewritten by a transition. - , Param "--force" , Param $ Remote.name remote - , Param $ refspec Annex.Branch.name + {- Using forcePush here is safe because we "own" the tagged branch + - we're pushing; it has no other writers. Ensures it is pushed + - even if it has been rewritten by a transition. -} + , Param $ Git.Branch.forcePush $ refspec Annex.Branch.name , Param $ refspec branch ] where diff --git a/Command/Sync.hs b/Command/Sync.hs index 551c2fa694..567e3146bb 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -167,29 +167,45 @@ pushRemote remote branch = go =<< needpush showOutput inRepo $ pushBranch remote branch -{- If the remote is a bare git repository, it's best to push the branch - - directly to it. On the other hand, if it's not bare, pushing to the - - checked out branch will fail, and this is why we use the syncBranch. +{- Pushes a regular branch like master to a remote. Also pushes the git-annex + - branch. + - + - If the remote is a bare git repository, it's best to push the regular + - branch directly to it, so that cloning/pulling will get it. + - On the other hand, if it's not bare, pushing to the checked out branch + - will fail, and this is why we push to its syncBranch. - - Git offers no way to tell if a remote is bare or not, so both methods - are tried. - - The direct push is likely to spew an ugly error message, so stderr is - - elided. Since progress is output to stderr too, the sync push is done - - first, and actually sends the data. Then the direct push is tried, - - with stderr discarded, to update the branch ref on the remote. + - elided. Since git progress display goes to stderr too, the sync push + - is done first, and actually sends the data. Then the direct push is + - tried, with stderr discarded, to update the branch ref on the remote. + - + - The sync push forces the update of the remote synced/git-annex branch. + - This is necessary if a transition has rewritten the git-annex branch. + - Normally any changes to the git-annex branch get pulled and merged before + - this push, so this forcing is unlikely to overwrite new data pushed + - in from another repository that is also syncing. + - + - But overwriting of data on synced/git-annex can happen, in a race. + - The only difference caused by using a forced push in that case is that + - the last repository to push wins the race, rather than the first to push. -} pushBranch :: Remote -> Git.Ref -> Git.Repo -> IO Bool -pushBranch remote branch g = tryIO directpush `after` syncpush +pushBranch remote branch g = tryIO (directpush g) `after` syncpush g where - syncpush = Git.Command.runBool (pushparams (refspec branch)) g - directpush = Git.Command.runQuiet (pushparams (show $ Git.Ref.base branch)) g - pushparams b = + syncpush = Git.Command.runBool $ pushparams + [ Git.Branch.forcePush $ refspec Annex.Branch.name + , refspec branch + ] + directpush = Git.Command.runQuiet $ pushparams + [show $ Git.Ref.base branch] + pushparams branches = [ Param "push" , Param $ Remote.name remote - , Param $ refspec Annex.Branch.name - , Param b - ] + ] ++ map Param branches refspec b = concat [ show $ Git.Ref.base b , ":" diff --git a/Git/Branch.hs b/Git/Branch.hs index d4a6840165..fed53d767e 100644 --- a/Git/Branch.hs +++ b/Git/Branch.hs @@ -101,3 +101,7 @@ commit message branch parentrefs repo = do return sha where ps = concatMap (\r -> ["-p", show r]) parentrefs + +{- A leading + makes git-push force pushing a branch. -} +forcePush :: String -> String +forcePush b = "+" ++ b diff --git a/debian/changelog b/debian/changelog index 68ba98b8bb..cb6fbee5a9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +git-annex (4.20130828) UNRELEASED; urgency=low + + * forget: New command, causes git-annex branch history to be forgotten + in a way that will spread to other clones of the repository. + (As long as they're running this version or newer of git-annex.) + * sync, assistant: Force push of the git-annex branch. Necessary + to ensure it gets pushed to remotes after being rewritten by forget. + + -- Joey Hess Tue, 27 Aug 2013 11:03:00 -0400 + git-annex (4.20130827) unstable; urgency=low * Youtube support! (And 53 other video hosts). When quvi is installed, From 6147652cc6342f5d8d39cb3aba3a50d35d352ed0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 16:41:59 -0400 Subject: [PATCH 6/9] wording --- Annex/Branch.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 5af6b6be97..13e1f11eb8 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -175,7 +175,7 @@ updateTo pairs = do - (and committed to the branch). - - Updates the branch if necessary, to ensure the most up-to-date available - - content is available. + - content is returned. - - Returns an empty string if the file doesn't exist yet. -} get :: FilePath -> Annex String From 62beaa1a86773ce5fa3f9bc60d447c129ecb15b5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 18:51:22 -0400 Subject: [PATCH 7/9] refactor git-annex branch log filename code into central location Having one module that knows about all the filenames used on the branch allows working back from an arbitrary filename to enough information about it to implement dropping dead remotes and doing other log file compacting as part of a forget transition. --- Command/Log.hs | 4 +- Logs.hs | 110 +++++++++++++++++++++++++++++++++++++++ Logs/Group.hs | 5 +- Logs/Location.hs | 21 ++------ Logs/PreferredContent.hs | 5 +- Logs/Remote.hs | 5 +- Logs/Trust.hs | 5 +- Logs/UUID.hs | 5 +- Logs/Web.hs | 36 ++----------- Test.hs | 2 + Upgrade/V2.hs | 6 +-- Utility/Misc.hs | 6 +++ 12 files changed, 136 insertions(+), 74 deletions(-) create mode 100644 Logs.hs diff --git a/Command/Log.hs b/Command/Log.hs index 2d4819f7ff..f3a5becb8a 100644 --- a/Command/Log.hs +++ b/Command/Log.hs @@ -17,7 +17,7 @@ import Data.Char import Common.Annex import Command -import qualified Logs.Location +import Logs import qualified Logs.Presence import Annex.CatFile import qualified Annex.Branch @@ -135,7 +135,7 @@ getLog :: Key -> [CommandParam] -> Annex [String] getLog key os = do top <- fromRepo Git.repoPath p <- liftIO $ relPathCwdToFile top - let logfile = p Logs.Location.logFile key + let logfile = p locationLogFile key inRepo $ pipeNullSplitZombie $ [ Params "log -z --pretty=format:%ct --raw --abbrev=40" , Param "--remove-empty" diff --git a/Logs.hs b/Logs.hs new file mode 100644 index 0000000000..6339efa135 --- /dev/null +++ b/Logs.hs @@ -0,0 +1,110 @@ +{- git-annex log file names + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs where + +import Common.Annex +import Types.Key + +data LogVariety = UUIDBasedLog | PresenceLog Key + deriving (Show) + +{- Converts a path from the git-annex branch into one of the varieties + - of logs used by git-annex, if it's a known path. -} +getLogVariety :: FilePath -> Maybe LogVariety +getLogVariety f + | f `elem` uuidBasedLogs = Just UUIDBasedLog + | otherwise = PresenceLog <$> firstJust (presenceLogs f) + +{- All the uuid-based logs stored in the git-annex branch. -} +uuidBasedLogs :: [FilePath] +uuidBasedLogs = + [ uuidLog + , remoteLog + , trustLog + , groupLog + , preferredContentLog + ] + +{- All the ways to get a key from a presence log file -} +presenceLogs :: FilePath -> [Maybe Key] +presenceLogs f = + [ urlLogFileKey f + , locationLogFileKey f + ] + +uuidLog :: FilePath +uuidLog = "uuid.log" + +remoteLog :: FilePath +remoteLog = "remote.log" + +trustLog :: FilePath +trustLog = "trust.log" + +groupLog :: FilePath +groupLog = "group.log" + +preferredContentLog :: FilePath +preferredContentLog = "preferred-content.log" + +{- The pathname of the location log file for a given key. -} +locationLogFile :: Key -> String +locationLogFile key = hashDirLower key ++ keyFile key ++ ".log" + +{- Converts a pathname into a key if it's a location log. -} +locationLogFileKey :: FilePath -> Maybe Key +locationLogFileKey path + | ["remote", "web"] `isPrefixOf` splitDirectories dir = Nothing + | ext == ".log" = fileKey base + | otherwise = Nothing + where + (dir, file) = splitFileName path + (base, ext) = splitAt (length file - 4) file + +{- The filename of the url log for a given key. -} +urlLogFile :: Key -> FilePath +urlLogFile key = hashDirLower key keyFile key ++ urlLogExt + +{- Old versions stored the urls elsewhere. -} +oldurlLogs :: Key -> [FilePath] +oldurlLogs key = + [ "remote/web" hashDirLower key key2file key ++ ".log" + , "remote/web" hashDirLower key keyFile key ++ ".log" + ] + +urlLogExt :: String +urlLogExt = ".log.web" + +{- Converts a url log file into a key. + - (Does not work on oldurlLogs.) -} +urlLogFileKey :: FilePath -> Maybe Key +urlLogFileKey path + | ext == urlLogExt = fileKey base + | otherwise = Nothing + where + file = takeFileName path + (base, ext) = splitAt (length file - extlen) file + extlen = length urlLogExt + +{- Does not work on oldurllogs. -} +isUrlLog :: FilePath -> Bool +isUrlLog file = urlLogExt `isSuffixOf` file + +prop_logs_sane :: Key -> Bool +prop_logs_sane dummykey = all id + [ isNothing (getLogVariety "unknown") + , expect isUUIDBasedLog (getLogVariety uuidLog) + , expect isPresenceLog (getLogVariety $ locationLogFile dummykey) + , expect isPresenceLog (getLogVariety $ urlLogFile dummykey) + ] + where + expect = maybe False + isUUIDBasedLog UUIDBasedLog = True + isUUIDBasedLog _ = False + isPresenceLog (PresenceLog k) = k == dummykey + isPresenceLog _ = False diff --git a/Logs/Group.hs b/Logs/Group.hs index ee3b75b860..3f88b627d3 100644 --- a/Logs/Group.hs +++ b/Logs/Group.hs @@ -21,16 +21,13 @@ import qualified Data.Set as S import Data.Time.Clock.POSIX import Common.Annex +import Logs import qualified Annex.Branch import qualified Annex import Logs.UUIDBased import Types.Group import Types.StandardGroups -{- Filename of group.log. -} -groupLog :: FilePath -groupLog = "group.log" - {- Returns the groups of a given repo UUID. -} lookupGroups :: UUID -> Annex (S.Set Group) lookupGroups u = (fromMaybe S.empty . M.lookup u) . groupsByUUID <$> groupMap diff --git a/Logs/Location.hs b/Logs/Location.hs index 0f57b66634..1289af3216 100644 --- a/Logs/Location.hs +++ b/Logs/Location.hs @@ -20,12 +20,11 @@ module Logs.Location ( loggedLocations, loggedKeys, loggedKeysFor, - logFile, - logFileKey ) where import Common.Annex import qualified Annex.Branch +import Logs import Logs.Presence import Annex.UUID @@ -37,19 +36,19 @@ logStatus key status = do {- Log a change in the presence of a key's value in a repository. -} logChange :: Key -> UUID -> LogStatus -> Annex () -logChange key (UUID u) s = addLog (logFile key) =<< logNow s u +logChange key (UUID u) s = addLog (locationLogFile key) =<< logNow s u logChange _ NoUUID _ = noop {- Returns a list of repository UUIDs that, according to the log, have - the value of a key. -} loggedLocations :: Key -> Annex [UUID] -loggedLocations key = map toUUID <$> (currentLog . logFile) key +loggedLocations key = map toUUID <$> (currentLog . locationLogFile) key {- Finds all keys that have location log information. - (There may be duplicate keys in the list.) -} loggedKeys :: Annex [Key] -loggedKeys = mapMaybe (logFileKey . takeFileName) <$> Annex.Branch.files +loggedKeys = mapMaybe locationLogFileKey <$> Annex.Branch.files {- Finds all keys that have location log information indicating - they are present for the specified repository. -} @@ -62,15 +61,3 @@ loggedKeysFor u = filterM isthere =<< loggedKeys us <- loggedLocations k let !there = u `elem` us return there - -{- The filename of the log file for a given key. -} -logFile :: Key -> String -logFile key = hashDirLower key ++ keyFile key ++ ".log" - -{- Converts a log filename into a key. -} -logFileKey :: FilePath -> Maybe Key -logFileKey file - | ext == ".log" = fileKey base - | otherwise = Nothing - where - (base, ext) = splitAt (length file - 4) file diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index 8005fc0d30..947a31875f 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -24,6 +24,7 @@ import Data.Time.Clock.POSIX import Common.Annex import qualified Annex.Branch import qualified Annex +import Logs import Logs.UUIDBased import Limit import qualified Utility.Matcher @@ -35,10 +36,6 @@ import Logs.Group import Logs.Remote import Types.StandardGroups -{- Filename of preferred-content.log. -} -preferredContentLog :: FilePath -preferredContentLog = "preferred-content.log" - {- Changes the preferred content configuration of a remote. -} preferredContentSet :: UUID -> String -> Annex () preferredContentSet uuid@(UUID _) val = do diff --git a/Logs/Remote.hs b/Logs/Remote.hs index 89792b0545..48ee9eb60b 100644 --- a/Logs/Remote.hs +++ b/Logs/Remote.hs @@ -25,12 +25,9 @@ import Data.Char import Common.Annex import qualified Annex.Branch import Types.Remote +import Logs import Logs.UUIDBased -{- Filename of remote.log. -} -remoteLog :: FilePath -remoteLog = "remote.log" - {- Adds or updates a remote's config in the log. -} configSet :: UUID -> RemoteConfig -> Annex () configSet u c = do diff --git a/Logs/Trust.hs b/Logs/Trust.hs index eb6e42ad7e..6c6b33f70e 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -27,14 +27,11 @@ import Common.Annex import Types.TrustLevel import qualified Annex.Branch import qualified Annex +import Logs import Logs.UUIDBased import Remote.List import qualified Types.Remote -{- Filename of trust.log. -} -trustLog :: FilePath -trustLog = "trust.log" - {- Returns a list of UUIDs that the trustLog indicates have the - specified trust level. - Note that the list can be incomplete for SemiTrusted, since that's diff --git a/Logs/UUID.hs b/Logs/UUID.hs index 2f24a388e4..ef1074e78b 100644 --- a/Logs/UUID.hs +++ b/Logs/UUID.hs @@ -28,13 +28,10 @@ import Types.UUID import Common.Annex import qualified Annex import qualified Annex.Branch +import Logs import Logs.UUIDBased import qualified Annex.UUID -{- Filename of uuid.log. -} -uuidLog :: FilePath -uuidLog = "uuid.log" - {- Records a description for a uuid in the log. -} describeUUID :: UUID -> String -> Annex () describeUUID uuid desc = do diff --git a/Logs/Web.hs b/Logs/Web.hs index 47ab61943d..0239f23350 100644 --- a/Logs/Web.hs +++ b/Logs/Web.hs @@ -11,8 +11,6 @@ module Logs.Web ( getUrls, setUrlPresent, setUrlMissing, - urlLog, - urlLogKey, knownUrls, Downloader(..), getDownloader, @@ -22,9 +20,9 @@ module Logs.Web ( import qualified Data.ByteString.Lazy.Char8 as L import Common.Annex +import Logs import Logs.Presence import Logs.Location -import Types.Key import qualified Annex.Branch import Annex.CatFile import qualified Git @@ -36,35 +34,9 @@ type URLString = String webUUID :: UUID webUUID = UUID "00000000-0000-0000-0000-000000000001" -urlLogExt :: String -urlLogExt = ".log.web" - -urlLog :: Key -> FilePath -urlLog key = hashDirLower key keyFile key ++ urlLogExt - -{- Converts a url log file into a key. - - (Does not work on oldurlLogs.) -} -urlLogKey :: FilePath -> Maybe Key -urlLogKey file - | ext == urlLogExt = fileKey base - | otherwise = Nothing - where - (base, ext) = splitAt (length file - extlen) file - extlen = length urlLogExt - -isUrlLog :: FilePath -> Bool -isUrlLog file = urlLogExt `isSuffixOf` file - -{- Used to store the urls elsewhere. -} -oldurlLogs :: Key -> [FilePath] -oldurlLogs key = - [ "remote/web" hashDirLower key key2file key ++ ".log" - , "remote/web" hashDirLower key keyFile key ++ ".log" - ] - {- Gets all urls that a key might be available from. -} getUrls :: Key -> Annex [URLString] -getUrls key = go $ urlLog key : oldurlLogs key +getUrls key = go $ urlLogFile key : oldurlLogs key where go [] = return [] go (l:ls) = do @@ -77,13 +49,13 @@ setUrlPresent :: Key -> URLString -> Annex () setUrlPresent key url = do us <- getUrls key unless (url `elem` us) $ do - addLog (urlLog key) =<< logNow InfoPresent url + addLog (urlLogFile key) =<< logNow InfoPresent url -- update location log to indicate that the web has the key logChange key webUUID InfoPresent setUrlMissing :: Key -> URLString -> Annex () setUrlMissing key url = do - addLog (urlLog key) =<< logNow InfoMissing url + addLog (urlLogFile key) =<< logNow InfoMissing url whenM (null <$> getUrls key) $ logChange key webUUID InfoMissing diff --git a/Test.hs b/Test.hs index 3eb330c226..ec70c4ecbd 100644 --- a/Test.hs +++ b/Test.hs @@ -33,6 +33,7 @@ import qualified Types.KeySource import qualified Types.Backend import qualified Types.TrustLevel import qualified Types +import qualified Logs import qualified Logs.UUIDBased import qualified Logs.Trust import qualified Logs.Remote @@ -115,6 +116,7 @@ quickcheck = , check "prop_idempotent_key_encode" Types.Key.prop_idempotent_key_encode , check "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape , check "prop_idempotent_shellEscape_multiword" Utility.SafeCommand.prop_idempotent_shellEscape_multiword + , check "prop_logs_sane" Logs.prop_logs_sane , check "prop_idempotent_configEscape" Logs.Remote.prop_idempotent_configEscape , check "prop_parse_show_Config" Logs.Remote.prop_parse_show_Config , check "prop_parentDir_basics" Utility.Path.prop_parentDir_basics diff --git a/Upgrade/V2.hs b/Upgrade/V2.hs index b5de6c8c04..42419b8abd 100644 --- a/Upgrade/V2.hs +++ b/Upgrade/V2.hs @@ -12,9 +12,9 @@ import qualified Git import qualified Git.Command import qualified Git.Ref import qualified Annex.Branch -import Logs.Location import Annex.Content import Utility.Tmp +import Logs olddir :: Git.Repo -> FilePath olddir g @@ -47,7 +47,7 @@ upgrade = do e <- liftIO $ doesDirectoryExist old when e $ do - mapM_ (\(k, f) -> inject f $ logFile k) =<< locationLogs + mapM_ (\(k, f) -> inject f $ locationLogFile k) =<< locationLogs mapM_ (\f -> inject f f) =<< logFiles old saveState False @@ -73,7 +73,7 @@ locationLogs = do where tryDirContents d = catchDefaultIO [] $ dirContents d islogfile f = maybe Nothing (\k -> Just (k, f)) $ - logFileKey $ takeFileName f + locationLogFileKey f inject :: FilePath -> FilePath -> Annex () inject source dest = do diff --git a/Utility/Misc.hs b/Utility/Misc.hs index 804a9e4872..48ce4c9294 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -91,6 +91,12 @@ massReplace vs = go [] vs go (replacement:acc) vs (drop (length val) s) | otherwise = go acc rest s +{- First item in the list that is not Nothing. -} +firstJust :: Eq a => [Maybe a] -> Maybe a +firstJust ms = case dropWhile (== Nothing) ms of + [] -> Nothing + (md:_) -> md + {- Given two orderings, returns the second if the first is EQ and returns - the first otherwise. - From 2f57d74534333d9fc51126499a0f31b1248d1deb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Aug 2013 20:28:45 -0400 Subject: [PATCH 8/9] remove print --- Annex/Branch.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 13e1f11eb8..334b60634f 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -402,7 +402,6 @@ handleTransitions :: Transitions -> [Git.Ref] -> Annex (Maybe (Git.Branch, [Git. handleTransitions localts refs = do m <- M.fromList <$> mapM getreftransition refs let remotets = M.elems m - liftIO $ print ("transitions", localts, remotets) if all (localts ==) remotets then return Nothing else do From 0831e18372861af8c83852a13e24ef91ba251335 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 31 Aug 2013 17:38:33 -0400 Subject: [PATCH 9/9] forget --drop-dead: Completely removes mentions of repositories that have been marked as dead from the git-annex branch. Wrote nice pure transition calculator, and ugly code to stage its results into the git-annex branch. Also had to split up several Log modules that Annex.Branch needed to use, but that themselves used Annex.Branch. The transition calculator is limited to looking at and changing one file at a time. While this made the implementation relatively easy, it precludes transitions that do stuff like deleting old url log files for keys that are being removed because they are no longer present anywhere. --- Annex/Branch.hs | 91 +++++++++++++++++++++++++++++-------- Annex/Branch/Transitions.hs | 53 +++++++++++++++++++++ Command/Forget.hs | 31 +++++++++---- Logs/Presence.hs | 83 ++------------------------------- Logs/Presence/Pure.hs | 84 ++++++++++++++++++++++++++++++++++ Logs/Transitions.hs | 13 ------ Logs/Trust.hs | 28 ++---------- Logs/Trust/Pure.hs | 36 +++++++++++++++ debian/changelog | 2 + doc/git-annex.mdwn | 13 +++--- 10 files changed, 279 insertions(+), 155 deletions(-) create mode 100644 Annex/Branch/Transitions.hs create mode 100644 Logs/Presence/Pure.hs create mode 100644 Logs/Trust/Pure.hs diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 334b60634f..9ee281de9b 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -46,8 +46,12 @@ import Annex.CatFile import Annex.Perms import qualified Annex import Utility.Env +import Logs import Logs.Transitions +import Logs.Trust.Pure import Annex.ReplaceFile +import qualified Annex.Queue +import Annex.Branch.Transitions {- Name of the branch that is used to store git-annex's information. -} name :: Git.Ref @@ -194,7 +198,10 @@ get' :: FilePath -> Annex String get' file = go =<< getJournalFile file where go (Just journalcontent) = return journalcontent - go Nothing = withIndex $ L.unpack <$> catFile fullname file + go Nothing = getRaw file + +getRaw :: FilePath -> Annex String +getRaw file = withIndex $ L.unpack <$> catFile fullname file {- Applies a function to modifiy the content of a file. - @@ -272,13 +279,17 @@ commitBranch' branchref message parents = do files :: Annex [FilePath] files = do update - withIndex $ do - bfiles <- inRepo $ Git.Command.pipeNullSplitZombie - [ Params "ls-tree --name-only -r -z" - , Param $ show fullname - ] - jfiles <- getJournalledFiles - return $ jfiles ++ bfiles + (++) + <$> branchFiles + <*> getJournalledFiles + +{- Files in the branch, not including any from journalled changes, + - and without updating the branch. -} +branchFiles :: Annex [FilePath] +branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie + [ Params "ls-tree --name-only -r -z" + , Param $ show fullname + ] {- Populates the branch's index file with the current branch contents. - @@ -436,20 +447,60 @@ getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content - commits it to the branch, or creates a new branch, and returns - the branch's ref. -} performTransitions :: Transitions -> Bool -> Annex Git.Ref -performTransitions ts neednewbranch = withIndex $ do - when (inTransitions ForgetDeadRemotes ts) $ - error "TODO ForgetDeadRemotes transition" - if neednewbranch - then do - committedref <- inRepo $ Git.Branch.commit message fullname [] - setIndexSha committedref - return committedref - else do - ref <- getBranch - commitBranch ref message [fullname] - getBranch +performTransitions ts neednewbranch = do + -- For simplicity & speed, we're going to use the Annex.Queue to + -- update the git-annex branch, while it usually holds changes + -- for the head branch. Flush any such changes. + Annex.Queue.flush + withIndex $ do + run $ mapMaybe getTransitionCalculator $ transitionList ts + Annex.Queue.flush + if neednewbranch + then do + committedref <- inRepo $ Git.Branch.commit message fullname [] + setIndexSha committedref + return committedref + else do + ref <- getBranch + commitBranch ref message [fullname] + getBranch where message | neednewbranch = "new branch for transition " ++ tdesc | otherwise = "continuing transition " ++ tdesc tdesc = show $ map describeTransition $ transitionList ts + + {- The changes to make to the branch are calculated and applied to + - the branch directly, rather than going through the journal, + - which would be innefficient. (And the journal is not designed + - to hold changes to every file in the branch at once.) + - + - When a file in the branch is changed by transition code, + - that value is remembered and fed into the code for subsequent + - transitions. + -} + run [] = noop + run changers = do + trustmap <- calcTrustMap <$> getRaw trustLog + fs <- branchFiles + hasher <- inRepo hashObjectStart + forM_ fs $ \f -> do + content <- getRaw f + apply changers hasher f content trustmap + liftIO $ hashObjectStop hasher + apply [] _ _ _ _ = return () + apply (changer:rest) hasher file content trustmap = + case changer file content trustmap of + RemoveFile -> do + Annex.Queue.addUpdateIndex + =<< inRepo (Git.UpdateIndex.unstageFile file) + -- File is deleted; can't run any other + -- transitions on it. + return () + ChangeFile content' -> do + sha <- inRepo $ hashObject BlobObject content' + Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $ + Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath file) + apply rest hasher file content' trustmap + PreserveFile -> + apply rest hasher file content trustmap diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs new file mode 100644 index 0000000000..90002de624 --- /dev/null +++ b/Annex/Branch/Transitions.hs @@ -0,0 +1,53 @@ +{- git-annex branch transitions + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Branch.Transitions ( + FileTransition(..), + getTransitionCalculator +) where + +import Logs +import Logs.Transitions +import Logs.UUIDBased as UUIDBased +import Logs.Presence.Pure as Presence +import Types.TrustLevel +import Types.UUID + +import qualified Data.Map as M + +data FileTransition + = ChangeFile String + | RemoveFile + | PreserveFile + +type TransitionCalculator = FilePath -> String -> TrustMap -> FileTransition + +getTransitionCalculator :: Transition -> Maybe TransitionCalculator +getTransitionCalculator ForgetGitHistory = Nothing +getTransitionCalculator ForgetDeadRemotes = Just dropDead + +dropDead :: FilePath -> String -> TrustMap -> FileTransition +dropDead f content trustmap = case getLogVariety f of + Just UUIDBasedLog -> ChangeFile $ + UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content + Just (PresenceLog _) -> + let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content + in if null newlog + then RemoveFile + else ChangeFile $ Presence.showLog newlog + Nothing -> PreserveFile + +dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String +dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const + +{- Presence logs can contain UUIDs or other values. Any line that matches + - a dead uuid is dropped; any other values are passed through. -} +dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine] +dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info) + +notDead :: TrustMap -> (v -> UUID) -> v -> Bool +notDead trustmap a v = M.findWithDefault SemiTrusted (a v) trustmap /= DeadTrusted diff --git a/Command/Forget.hs b/Command/Forget.hs index e405a99181..d216ae3ca4 100644 --- a/Command/Forget.hs +++ b/Command/Forget.hs @@ -12,30 +12,41 @@ import Command import qualified Annex.Branch as Branch import Logs.Transitions import qualified Annex +import qualified Option import Data.Time.Clock.POSIX def :: [Command] -def = [command "forget" paramNothing seek +def = [withOptions forgetOptions $ command "forget" paramNothing seek SectionMaintenance "prune git-annex branch history"] +forgetOptions :: [Option] +forgetOptions = [dropDeadOption] + +dropDeadOption :: Option +dropDeadOption = Option.flag [] "drop-dead" "drop references to dead repositories" + seek :: [CommandSeek] -seek = [withNothing start] +seek = [withFlag dropDeadOption $ \dropdead -> + withNothing $ start dropdead] -start :: CommandStart -start = do +start :: Bool -> CommandStart +start dropdead = do showStart "forget" "git-annex" - next $ perform =<< Annex.getState Annex.force - -perform :: Bool -> CommandPerform -perform True = do now <- liftIO getPOSIXTime - let ts = addTransition now ForgetGitHistory noTransitions + let basets = addTransition now ForgetGitHistory noTransitions + let ts = if dropdead + then addTransition now ForgetDeadRemotes basets + else basets + next $ perform ts =<< Annex.getState Annex.force + +perform :: Transitions -> Bool -> CommandPerform +perform ts True = do recordTransitions Branch.change ts -- get branch committed before contining with the transition Branch.update void $ Branch.performTransitions ts True next $ return True -perform False = do +perform _ False = do showLongNote "To forget git-annex branch history, you must specify --force. This deletes metadata!" stop diff --git a/Logs/Presence.hs b/Logs/Presence.hs index ec5cec209a..516d59618f 100644 --- a/Logs/Presence.hs +++ b/Logs/Presence.hs @@ -12,36 +12,18 @@ -} module Logs.Presence ( - LogStatus(..), - LogLine(LogLine), + module X, addLog, readLog, - getLog, - parseLog, - showLog, logNow, - compactLog, - currentLog, - prop_parse_show_log, + currentLog ) where import Data.Time.Clock.POSIX -import Data.Time -import System.Locale -import qualified Data.Map as M +import Logs.Presence.Pure as X import Common.Annex import qualified Annex.Branch -import Utility.QuickCheck - -data LogLine = LogLine { - date :: POSIXTime, - status :: LogStatus, - info :: String -} deriving (Eq, Show) - -data LogStatus = InfoPresent | InfoMissing - deriving (Eq, Show, Bounded, Enum) addLog :: FilePath -> LogLine -> Annex () addLog file line = Annex.Branch.change file $ \s -> @@ -52,29 +34,6 @@ addLog file line = Annex.Branch.change file $ \s -> readLog :: FilePath -> Annex [LogLine] readLog = parseLog <$$> Annex.Branch.get -{- Parses a log file. Unparseable lines are ignored. -} -parseLog :: String -> [LogLine] -parseLog = mapMaybe parseline . lines - where - parseline l = LogLine - <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d) - <*> parsestatus s - <*> pure rest - where - (d, pastd) = separate (== ' ') l - (s, rest) = separate (== ' ') pastd - parsestatus "1" = Just InfoPresent - parsestatus "0" = Just InfoMissing - parsestatus _ = Nothing - -{- Generates a log file. -} -showLog :: [LogLine] -> String -showLog = unlines . map genline - where - genline (LogLine d s i) = unwords [show d, genstatus s, i] - genstatus InfoPresent = "1" - genstatus InfoMissing = "0" - {- Generates a new LogLine with the current date. -} logNow :: LogStatus -> String -> Annex LogLine logNow s i = do @@ -84,39 +43,3 @@ logNow s i = do {- Reads a log and returns only the info that is still in effect. -} currentLog :: FilePath -> Annex [String] currentLog file = map info . filterPresent <$> readLog file - -{- Given a log, returns only the info that is are still in effect. -} -getLog :: String -> [String] -getLog = map info . filterPresent . parseLog - -{- Returns the info from LogLines that are in effect. -} -filterPresent :: [LogLine] -> [LogLine] -filterPresent = filter (\l -> InfoPresent == status l) . compactLog - -{- Compacts a set of logs, returning a subset that contains the current - - status. -} -compactLog :: [LogLine] -> [LogLine] -compactLog = M.elems . foldr mapLog M.empty - -type LogMap = M.Map String LogLine - -{- Inserts a log into a map of logs, if the log has better (ie, newer) - - information than the other logs in the map -} -mapLog :: LogLine -> LogMap -> LogMap -mapLog l m - | better = M.insert i l m - | otherwise = m - where - better = maybe True newer $ M.lookup i m - newer l' = date l' <= date l - i = info l - -instance Arbitrary LogLine where - arbitrary = LogLine - <$> arbitrary - <*> elements [minBound..maxBound] - <*> arbitrary `suchThat` ('\n' `notElem`) - -prop_parse_show_log :: [LogLine] -> Bool -prop_parse_show_log l = parseLog (showLog l) == l - diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs new file mode 100644 index 0000000000..ffeb78b26d --- /dev/null +++ b/Logs/Presence/Pure.hs @@ -0,0 +1,84 @@ +{- git-annex presence log, pure operations + - + - Copyright 2010-2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Presence.Pure where + +import Data.Time.Clock.POSIX +import Data.Time +import System.Locale +import qualified Data.Map as M + +import Common.Annex +import Utility.QuickCheck + +data LogLine = LogLine { + date :: POSIXTime, + status :: LogStatus, + info :: String +} deriving (Eq, Show) + +data LogStatus = InfoPresent | InfoMissing + deriving (Eq, Show, Bounded, Enum) + +{- Parses a log file. Unparseable lines are ignored. -} +parseLog :: String -> [LogLine] +parseLog = mapMaybe parseline . lines + where + parseline l = LogLine + <$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d) + <*> parsestatus s + <*> pure rest + where + (d, pastd) = separate (== ' ') l + (s, rest) = separate (== ' ') pastd + parsestatus "1" = Just InfoPresent + parsestatus "0" = Just InfoMissing + parsestatus _ = Nothing + +{- Generates a log file. -} +showLog :: [LogLine] -> String +showLog = unlines . map genline + where + genline (LogLine d s i) = unwords [show d, genstatus s, i] + genstatus InfoPresent = "1" + genstatus InfoMissing = "0" + +{- Given a log, returns only the info that is are still in effect. -} +getLog :: String -> [String] +getLog = map info . filterPresent . parseLog + +{- Returns the info from LogLines that are in effect. -} +filterPresent :: [LogLine] -> [LogLine] +filterPresent = filter (\l -> InfoPresent == status l) . compactLog + +{- Compacts a set of logs, returning a subset that contains the current + - status. -} +compactLog :: [LogLine] -> [LogLine] +compactLog = M.elems . foldr mapLog M.empty + +type LogMap = M.Map String LogLine + +{- Inserts a log into a map of logs, if the log has better (ie, newer) + - information than the other logs in the map -} +mapLog :: LogLine -> LogMap -> LogMap +mapLog l m + | better = M.insert i l m + | otherwise = m + where + better = maybe True newer $ M.lookup i m + newer l' = date l' <= date l + i = info l + +instance Arbitrary LogLine where + arbitrary = LogLine + <$> arbitrary + <*> elements [minBound..maxBound] + <*> arbitrary `suchThat` ('\n' `notElem`) + +prop_parse_show_log :: [LogLine] -> Bool +prop_parse_show_log l = parseLog (showLog l) == l + diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index d4b7d5eb35..783ce5090a 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -7,16 +7,6 @@ - done that is listed in the remote branch by checking that the local - branch contains the same transition, with the same or newer start time. - - - When a remote branch that has had an transition performed on it - - becomes available for merging into the local git-annex branch, - - the transition is first performed on the local branch. - - - - When merging a remote branch into the local git-annex branch, - - all transitions that have been performed on the local branch must also - - have been performed on the remote branch already. (Or it would be - - possible to perform the transitions on a fixup branch and merge it, - - but that would be expensive.) - - - Copyright 2013 Joey Hess - - Licensed under the GNU GPL version 3 or higher. @@ -86,9 +76,6 @@ parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts combineTransitions :: [Transitions] -> Transitions combineTransitions = S.unions -inTransitions :: Transition -> Transitions -> Bool -inTransitions t = not . S.null . S.filter (\l -> transition l == t) - transitionList :: Transitions -> [Transition] transitionList = map transition . S.elems diff --git a/Logs/Trust.hs b/Logs/Trust.hs index 6c6b33f70e..c6f0ad3abd 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -6,6 +6,7 @@ -} module Logs.Trust ( + module X, trustLog, TrustLevel(..), trustGet, @@ -16,8 +17,6 @@ module Logs.Trust ( lookupTrust, trustMapLoad, trustMapRaw, - - prop_parse_show_TrustLog, ) where import qualified Data.Map as M @@ -31,6 +30,7 @@ import Logs import Logs.UUIDBased import Remote.List import qualified Types.Remote +import Logs.Trust.Pure as X {- Returns a list of UUIDs that the trustLog indicates have the - specified trust level. @@ -94,26 +94,4 @@ trustMapLoad = do {- Does not include forcetrust or git config values, just those from the - log file. -} trustMapRaw :: Annex TrustMap -trustMapRaw = simpleMap . parseLog (Just . parseTrustLog) - <$> Annex.Branch.get trustLog - -{- The trust.log used to only list trusted repos, without a field for the - - trust status, which is why this defaults to Trusted. -} -parseTrustLog :: String -> TrustLevel -parseTrustLog s = maybe Trusted parse $ headMaybe $ words s - where - parse "1" = Trusted - parse "0" = UnTrusted - parse "X" = DeadTrusted - parse _ = SemiTrusted - -showTrustLog :: TrustLevel -> String -showTrustLog Trusted = "1" -showTrustLog UnTrusted = "0" -showTrustLog DeadTrusted = "X" -showTrustLog SemiTrusted = "?" - -prop_parse_show_TrustLog :: Bool -prop_parse_show_TrustLog = all check [minBound .. maxBound] - where - check l = parseTrustLog (showTrustLog l) == l +trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog diff --git a/Logs/Trust/Pure.hs b/Logs/Trust/Pure.hs new file mode 100644 index 0000000000..11cfbe0563 --- /dev/null +++ b/Logs/Trust/Pure.hs @@ -0,0 +1,36 @@ +{- git-annex trust log, pure operations + - + - Copyright 2010-2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Trust.Pure where + +import Common.Annex +import Types.TrustLevel +import Logs.UUIDBased + +calcTrustMap :: String -> TrustMap +calcTrustMap = simpleMap . parseLog (Just . parseTrustLog) + +{- The trust.log used to only list trusted repos, without a field for the + - trust status, which is why this defaults to Trusted. -} +parseTrustLog :: String -> TrustLevel +parseTrustLog s = maybe Trusted parse $ headMaybe $ words s + where + parse "1" = Trusted + parse "0" = UnTrusted + parse "X" = DeadTrusted + parse _ = SemiTrusted + +showTrustLog :: TrustLevel -> String +showTrustLog Trusted = "1" +showTrustLog UnTrusted = "0" +showTrustLog DeadTrusted = "X" +showTrustLog SemiTrusted = "?" + +prop_parse_show_TrustLog :: Bool +prop_parse_show_TrustLog = all check [minBound .. maxBound] + where + check l = parseTrustLog (showTrustLog l) == l diff --git a/debian/changelog b/debian/changelog index cb6fbee5a9..a28c9f1877 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,8 @@ git-annex (4.20130828) UNRELEASED; urgency=low * forget: New command, causes git-annex branch history to be forgotten in a way that will spread to other clones of the repository. (As long as they're running this version or newer of git-annex.) + * forget --drop-dead: Completely removes mentions of repositories that + have been marked as dead from the git-annex branch. * sync, assistant: Force push of the git-annex branch. Necessary to ensure it gets pushed to remotes after being rewritten by forget. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5fb0ce5a4d..269588addf 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -482,17 +482,16 @@ subdirectories). * forget Causes the git-annex branch to be rewritten, throwing away historical - data about past locations of files, files that are no longer present on - any remote, etc. The resulting branch will use less space, but for - example `git annex log` will not be able to show where files used to - be located. + data about past locations of files. The resulting branch will use less + space, but `git annex log` will not be able to show where + files used to be located. - To also prune references to remotes that have been marked as dead, - specify --dead. + To also prune references to repositories that have been marked as dead, + specify --drop-dead. When this rewritten branch is merged into other clones of the repository, git-annex will automatically perform the same rewriting - to their local git-annex branch. So the forgetfulness will automatically + to their local git-annex branches. So the forgetfulness will automatically propigate out from its starting point until all repositories running git-annex have forgotten their old history. (You may need to force git to push the branch to any git repositories not running git-annex.