Merge branch 'forget'

Conflicts:
	debian/changelog
This commit is contained in:
Joey Hess 2013-09-03 14:36:00 -04:00
commit db83cc82d6
26 changed files with 685 additions and 214 deletions

View file

@ -1,6 +1,6 @@
{- management of the git-annex branch
-
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
- Copyright 2011-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -22,9 +22,12 @@ module Annex.Branch (
commit,
files,
withIndex,
performTransitions,
) where
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.Set as S
import qualified Data.Map as M
import Common.Annex
import Annex.BranchState
@ -32,6 +35,7 @@ import Annex.Journal
import qualified Git
import qualified Git.Command
import qualified Git.Ref
import qualified Git.Sha
import qualified Git.Branch
import qualified Git.UnionMerge
import qualified Git.UpdateIndex
@ -42,6 +46,12 @@ import Annex.CatFile
import Annex.Perms
import qualified Annex
import Utility.Env
import Logs
import Logs.Transitions
import Logs.Trust.Pure
import Annex.ReplaceFile
import qualified Annex.Queue
import Annex.Branch.Transitions
{- Name of the branch that is used to store git-annex's information. -}
name :: Git.Ref
@ -110,6 +120,9 @@ forceUpdate = updateTo =<< siblingBranches
- later get staged, and might overwrite changes made during the merge.
- This is only done if some of the Refs do need to be merged.
-
- Also handles performing any Transitions that have not yet been
- performed, in either the local branch, or the Refs.
-
- Returns True if any refs were merged in, False otherwise.
-}
updateTo :: [(Git.Ref, Git.Branch)] -> Annex Bool
@ -117,7 +130,8 @@ updateTo pairs = do
-- ensure branch exists, and get its current ref
branchref <- getBranch
dirty <- journalDirty
(refs, branches) <- unzip <$> filterM isnewer pairs
ignoredrefs <- getIgnoredRefs
(refs, branches) <- unzip <$> filterM (isnewer ignoredrefs) pairs
if null refs
{- Even when no refs need to be merged, the index
- may still be updated if the branch has gotten ahead
@ -132,7 +146,9 @@ updateTo pairs = do
else lockJournal $ go branchref dirty refs branches
return $ not $ null refs
where
isnewer (r, _) = inRepo $ Git.Branch.changed fullname r
isnewer ignoredrefs (r, _)
| S.member r ignoredrefs = return False
| otherwise = inRepo $ Git.Branch.changed fullname r
go branchref dirty refs branches = withIndex $ do
cleanjournal <- if dirty then stageJournal else return noop
let merge_desc = if null branches
@ -140,23 +156,30 @@ updateTo pairs = do
else "merging " ++
unwords (map Git.Ref.describe branches) ++
" into " ++ show name
localtransitions <- parseTransitionsStrictly "local"
<$> getStale transitionsLog
unless (null branches) $ do
showSideAction merge_desc
mergeIndex refs
ff <- if dirty
then return False
else inRepo $ Git.Branch.fastForward fullname refs
if ff
then updateIndex branchref
else commitBranch branchref merge_desc
(nub $ fullname:refs)
let commitrefs = nub $ fullname:refs
transitioned <- handleTransitions localtransitions commitrefs
case transitioned of
Nothing -> do
ff <- if dirty
then return False
else inRepo $ Git.Branch.fastForward fullname refs
if ff
then updateIndex branchref
else commitBranch branchref merge_desc commitrefs
Just (branchref', commitrefs') ->
commitBranch branchref' merge_desc commitrefs'
liftIO cleanjournal
{- Gets the content of a file, which may be in the journal, or in the index
- (and committed to the branch).
-
- Updates the branch if necessary, to ensure the most up-to-date available
- content is available.
- content is returned.
-
- Returns an empty string if the file doesn't exist yet. -}
get :: FilePath -> Annex String
@ -175,7 +198,10 @@ get' :: FilePath -> Annex String
get' file = go =<< getJournalFile file
where
go (Just journalcontent) = return journalcontent
go Nothing = withIndex $ L.unpack <$> catFile fullname file
go Nothing = getRaw file
getRaw :: FilePath -> Annex String
getRaw file = withIndex $ L.unpack <$> catFile fullname file
{- Applies a function to modifiy the content of a file.
-
@ -253,13 +279,17 @@ commitBranch' branchref message parents = do
files :: Annex [FilePath]
files = do
update
withIndex $ do
bfiles <- inRepo $ Git.Command.pipeNullSplitZombie
[ Params "ls-tree --name-only -r -z"
, Param $ show fullname
]
jfiles <- getJournalledFiles
return $ jfiles ++ bfiles
(++)
<$> branchFiles
<*> getJournalledFiles
{- Files in the branch, not including any from journalled changes,
- and without updating the branch. -}
branchFiles :: Annex [FilePath]
branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie
[ Params "ls-tree --name-only -r -z"
, Param $ show fullname
]
{- Populates the branch's index file with the current branch contents.
-
@ -361,3 +391,116 @@ stageJournal = withIndex $ do
sha <- hashFile h path
streamer $ Git.UpdateIndex.updateIndexLine
sha FileBlob (asTopFilePath $ fileJournal file)
{- This is run after the refs have been merged into the index,
- but before the result is committed to the branch.
- Which is why it's passed the contents of the local branches's
- transition log before that merge took place.
-
- When the refs contain transitions that have not yet been done locally,
- the transitions are performed on the index, and a new branch
- is created from the result, and returned.
-
- When there are transitions recorded locally that have not been done
- to the remote refs, the transitions are performed in the index,
- and the existing branch is returned. In this case, the untransitioned
- remote refs cannot be merged into the branch (since transitions
- throw away history), so none of them are included in the returned
- list of refs, and they are added to the list of refs to ignore,
- to avoid re-merging content from them again.
-}
handleTransitions :: Transitions -> [Git.Ref] -> Annex (Maybe (Git.Branch, [Git.Ref]))
handleTransitions localts refs = do
m <- M.fromList <$> mapM getreftransition refs
let remotets = M.elems m
if all (localts ==) remotets
then return Nothing
else do
let allts = combineTransitions (localts:remotets)
let (transitionedrefs, untransitionedrefs) =
partition (\r -> M.lookup r m == Just allts) refs
transitionedbranch <- performTransitions allts (localts /= allts)
ignoreRefs untransitionedrefs
return $ Just (transitionedbranch, transitionedrefs)
where
getreftransition ref = do
ts <- parseTransitionsStrictly "remote" . L.unpack
<$> catFile ref transitionsLog
return (ref, ts)
ignoreRefs :: [Git.Ref] -> Annex ()
ignoreRefs rs = do
old <- getIgnoredRefs
let s = S.unions [old, S.fromList rs]
f <- fromRepo gitAnnexIgnoredRefs
replaceFile f $ \tmp -> liftIO $ writeFile tmp $
unlines $ map show $ S.elems s
getIgnoredRefs :: Annex (S.Set Git.Ref)
getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content
where
content = do
f <- fromRepo gitAnnexIgnoredRefs
liftIO $ catchDefaultIO "" $ readFile f
{- Performs the specified transitions on the contents of the index file,
- commits it to the branch, or creates a new branch, and returns
- the branch's ref. -}
performTransitions :: Transitions -> Bool -> Annex Git.Ref
performTransitions ts neednewbranch = do
-- For simplicity & speed, we're going to use the Annex.Queue to
-- update the git-annex branch, while it usually holds changes
-- for the head branch. Flush any such changes.
Annex.Queue.flush
withIndex $ do
run $ mapMaybe getTransitionCalculator $ transitionList ts
Annex.Queue.flush
if neednewbranch
then do
committedref <- inRepo $ Git.Branch.commit message fullname []
setIndexSha committedref
return committedref
else do
ref <- getBranch
commitBranch ref message [fullname]
getBranch
where
message
| neednewbranch = "new branch for transition " ++ tdesc
| otherwise = "continuing transition " ++ tdesc
tdesc = show $ map describeTransition $ transitionList ts
{- The changes to make to the branch are calculated and applied to
- the branch directly, rather than going through the journal,
- which would be innefficient. (And the journal is not designed
- to hold changes to every file in the branch at once.)
-
- When a file in the branch is changed by transition code,
- that value is remembered and fed into the code for subsequent
- transitions.
-}
run [] = noop
run changers = do
trustmap <- calcTrustMap <$> getRaw trustLog
fs <- branchFiles
hasher <- inRepo hashObjectStart
forM_ fs $ \f -> do
content <- getRaw f
apply changers hasher f content trustmap
liftIO $ hashObjectStop hasher
apply [] _ _ _ _ = return ()
apply (changer:rest) hasher file content trustmap =
case changer file content trustmap of
RemoveFile -> do
Annex.Queue.addUpdateIndex
=<< inRepo (Git.UpdateIndex.unstageFile file)
-- File is deleted; can't run any other
-- transitions on it.
return ()
ChangeFile content' -> do
sha <- inRepo $ hashObject BlobObject content'
Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $
Git.UpdateIndex.updateIndexLine sha FileBlob (asTopFilePath file)
apply rest hasher file content' trustmap
PreserveFile ->
apply rest hasher file content trustmap

View file

@ -0,0 +1,53 @@
{- git-annex branch transitions
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Branch.Transitions (
FileTransition(..),
getTransitionCalculator
) where
import Logs
import Logs.Transitions
import Logs.UUIDBased as UUIDBased
import Logs.Presence.Pure as Presence
import Types.TrustLevel
import Types.UUID
import qualified Data.Map as M
data FileTransition
= ChangeFile String
| RemoveFile
| PreserveFile
type TransitionCalculator = FilePath -> String -> TrustMap -> FileTransition
getTransitionCalculator :: Transition -> Maybe TransitionCalculator
getTransitionCalculator ForgetGitHistory = Nothing
getTransitionCalculator ForgetDeadRemotes = Just dropDead
dropDead :: FilePath -> String -> TrustMap -> FileTransition
dropDead f content trustmap = case getLogVariety f of
Just UUIDBasedLog -> ChangeFile $
UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content
Just (PresenceLog _) ->
let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
in if null newlog
then RemoveFile
else ChangeFile $ Presence.showLog newlog
Nothing -> PreserveFile
dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String
dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const
{- Presence logs can contain UUIDs or other values. Any line that matches
- a dead uuid is dropped; any other values are passed through. -}
dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine]
dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info)
notDead :: TrustMap -> (v -> UUID) -> v -> Bool
notDead trustmap a v = M.findWithDefault SemiTrusted (a v) trustmap /= DeadTrusted

View file

@ -13,6 +13,7 @@ import qualified Annex.Branch
import qualified Git
import qualified Git.Ref
import qualified Git.Command
import qualified Git.Branch
import Utility.Base64
{- Converts a git branch into a branch that is tagged with a UUID, typically
@ -50,7 +51,10 @@ taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool
taggedPush u info branch remote = Git.Command.runBool
[ Param "push"
, Param $ Remote.name remote
, Param $ refspec Annex.Branch.name
{- Using forcePush here is safe because we "own" the tagged branch
- we're pushing; it has no other writers. Ensures it is pushed
- even if it has been rewritten by a transition. -}
, Param $ Git.Branch.forcePush $ refspec Annex.Branch.name
, Param $ refspec branch
]
where

52
Command/Forget.hs Normal file
View file

@ -0,0 +1,52 @@
{- git-annex command
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Forget where
import Common.Annex
import Command
import qualified Annex.Branch as Branch
import Logs.Transitions
import qualified Annex
import qualified Option
import Data.Time.Clock.POSIX
def :: [Command]
def = [withOptions forgetOptions $ command "forget" paramNothing seek
SectionMaintenance "prune git-annex branch history"]
forgetOptions :: [Option]
forgetOptions = [dropDeadOption]
dropDeadOption :: Option
dropDeadOption = Option.flag [] "drop-dead" "drop references to dead repositories"
seek :: [CommandSeek]
seek = [withFlag dropDeadOption $ \dropdead ->
withNothing $ start dropdead]
start :: Bool -> CommandStart
start dropdead = do
showStart "forget" "git-annex"
now <- liftIO getPOSIXTime
let basets = addTransition now ForgetGitHistory noTransitions
let ts = if dropdead
then addTransition now ForgetDeadRemotes basets
else basets
next $ perform ts =<< Annex.getState Annex.force
perform :: Transitions -> Bool -> CommandPerform
perform ts True = do
recordTransitions Branch.change ts
-- get branch committed before contining with the transition
Branch.update
void $ Branch.performTransitions ts True
next $ return True
perform _ False = do
showLongNote "To forget git-annex branch history, you must specify --force. This deletes metadata!"
stop

View file

@ -17,7 +17,7 @@ import Data.Char
import Common.Annex
import Command
import qualified Logs.Location
import Logs
import qualified Logs.Presence
import Annex.CatFile
import qualified Annex.Branch
@ -135,7 +135,7 @@ getLog :: Key -> [CommandParam] -> Annex [String]
getLog key os = do
top <- fromRepo Git.repoPath
p <- liftIO $ relPathCwdToFile top
let logfile = p </> Logs.Location.logFile key
let logfile = p </> locationLogFile key
inRepo $ pipeNullSplitZombie $
[ Params "log -z --pretty=format:%ct --raw --abbrev=40"
, Param "--remove-empty"

View file

@ -167,29 +167,45 @@ pushRemote remote branch = go =<< needpush
showOutput
inRepo $ pushBranch remote branch
{- If the remote is a bare git repository, it's best to push the branch
- directly to it. On the other hand, if it's not bare, pushing to the
- checked out branch will fail, and this is why we use the syncBranch.
{- Pushes a regular branch like master to a remote. Also pushes the git-annex
- branch.
-
- If the remote is a bare git repository, it's best to push the regular
- branch directly to it, so that cloning/pulling will get it.
- On the other hand, if it's not bare, pushing to the checked out branch
- will fail, and this is why we push to its syncBranch.
-
- Git offers no way to tell if a remote is bare or not, so both methods
- are tried.
-
- The direct push is likely to spew an ugly error message, so stderr is
- elided. Since progress is output to stderr too, the sync push is done
- first, and actually sends the data. Then the direct push is tried,
- with stderr discarded, to update the branch ref on the remote.
- elided. Since git progress display goes to stderr too, the sync push
- is done first, and actually sends the data. Then the direct push is
- tried, with stderr discarded, to update the branch ref on the remote.
-
- The sync push forces the update of the remote synced/git-annex branch.
- This is necessary if a transition has rewritten the git-annex branch.
- Normally any changes to the git-annex branch get pulled and merged before
- this push, so this forcing is unlikely to overwrite new data pushed
- in from another repository that is also syncing.
-
- But overwriting of data on synced/git-annex can happen, in a race.
- The only difference caused by using a forced push in that case is that
- the last repository to push wins the race, rather than the first to push.
-}
pushBranch :: Remote -> Git.Ref -> Git.Repo -> IO Bool
pushBranch remote branch g = tryIO directpush `after` syncpush
pushBranch remote branch g = tryIO (directpush g) `after` syncpush g
where
syncpush = Git.Command.runBool (pushparams (refspec branch)) g
directpush = Git.Command.runQuiet (pushparams (show $ Git.Ref.base branch)) g
pushparams b =
syncpush = Git.Command.runBool $ pushparams
[ Git.Branch.forcePush $ refspec Annex.Branch.name
, refspec branch
]
directpush = Git.Command.runQuiet $ pushparams
[show $ Git.Ref.base branch]
pushparams branches =
[ Param "push"
, Param $ Remote.name remote
, Param $ refspec Annex.Branch.name
, Param b
]
] ++ map Param branches
refspec b = concat
[ show $ Git.Ref.base b
, ":"

View file

@ -101,3 +101,7 @@ commit message branch parentrefs repo = do
return sha
where
ps = concatMap (\r -> ["-p", show r]) parentrefs
{- A leading + makes git-push force pushing a branch. -}
forcePush :: String -> String
forcePush b = "+" ++ b

View file

@ -67,6 +67,7 @@ import qualified Command.Map
import qualified Command.Direct
import qualified Command.Indirect
import qualified Command.Upgrade
import qualified Command.Forget
import qualified Command.Version
import qualified Command.Help
#ifdef WITH_ASSISTANT
@ -139,6 +140,7 @@ cmds = concat
, Command.Direct.def
, Command.Indirect.def
, Command.Upgrade.def
, Command.Forget.def
, Command.Version.def
, Command.Help.def
#ifdef WITH_ASSISTANT

View file

@ -35,6 +35,7 @@ module Locations (
gitAnnexJournalLock,
gitAnnexIndex,
gitAnnexIndexLock,
gitAnnexIgnoredRefs,
gitAnnexPidFile,
gitAnnexDaemonStatusFile,
gitAnnexLogFile,
@ -225,6 +226,10 @@ gitAnnexIndex r = gitAnnexDir r </> "index"
gitAnnexIndexLock :: Git.Repo -> FilePath
gitAnnexIndexLock r = gitAnnexDir r </> "index.lck"
{- List of refs that should not be merged into the git-annex branch. -}
gitAnnexIgnoredRefs :: Git.Repo -> FilePath
gitAnnexIgnoredRefs r = gitAnnexDir r </> "ignoredrefs"
{- Pid file for daemon mode. -}
gitAnnexPidFile :: Git.Repo -> FilePath
gitAnnexPidFile r = gitAnnexDir r </> "daemon.pid"

110
Logs.hs Normal file
View file

@ -0,0 +1,110 @@
{- git-annex log file names
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs where
import Common.Annex
import Types.Key
data LogVariety = UUIDBasedLog | PresenceLog Key
deriving (Show)
{- Converts a path from the git-annex branch into one of the varieties
- of logs used by git-annex, if it's a known path. -}
getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
| f `elem` uuidBasedLogs = Just UUIDBasedLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
{- All the uuid-based logs stored in the git-annex branch. -}
uuidBasedLogs :: [FilePath]
uuidBasedLogs =
[ uuidLog
, remoteLog
, trustLog
, groupLog
, preferredContentLog
]
{- All the ways to get a key from a presence log file -}
presenceLogs :: FilePath -> [Maybe Key]
presenceLogs f =
[ urlLogFileKey f
, locationLogFileKey f
]
uuidLog :: FilePath
uuidLog = "uuid.log"
remoteLog :: FilePath
remoteLog = "remote.log"
trustLog :: FilePath
trustLog = "trust.log"
groupLog :: FilePath
groupLog = "group.log"
preferredContentLog :: FilePath
preferredContentLog = "preferred-content.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: Key -> String
locationLogFile key = hashDirLower key ++ keyFile key ++ ".log"
{- Converts a pathname into a key if it's a location log. -}
locationLogFileKey :: FilePath -> Maybe Key
locationLogFileKey path
| ["remote", "web"] `isPrefixOf` splitDirectories dir = Nothing
| ext == ".log" = fileKey base
| otherwise = Nothing
where
(dir, file) = splitFileName path
(base, ext) = splitAt (length file - 4) file
{- The filename of the url log for a given key. -}
urlLogFile :: Key -> FilePath
urlLogFile key = hashDirLower key </> keyFile key ++ urlLogExt
{- Old versions stored the urls elsewhere. -}
oldurlLogs :: Key -> [FilePath]
oldurlLogs key =
[ "remote/web" </> hashDirLower key </> key2file key ++ ".log"
, "remote/web" </> hashDirLower key </> keyFile key ++ ".log"
]
urlLogExt :: String
urlLogExt = ".log.web"
{- Converts a url log file into a key.
- (Does not work on oldurlLogs.) -}
urlLogFileKey :: FilePath -> Maybe Key
urlLogFileKey path
| ext == urlLogExt = fileKey base
| otherwise = Nothing
where
file = takeFileName path
(base, ext) = splitAt (length file - extlen) file
extlen = length urlLogExt
{- Does not work on oldurllogs. -}
isUrlLog :: FilePath -> Bool
isUrlLog file = urlLogExt `isSuffixOf` file
prop_logs_sane :: Key -> Bool
prop_logs_sane dummykey = all id
[ isNothing (getLogVariety "unknown")
, expect isUUIDBasedLog (getLogVariety uuidLog)
, expect isPresenceLog (getLogVariety $ locationLogFile dummykey)
, expect isPresenceLog (getLogVariety $ urlLogFile dummykey)
]
where
expect = maybe False
isUUIDBasedLog UUIDBasedLog = True
isUUIDBasedLog _ = False
isPresenceLog (PresenceLog k) = k == dummykey
isPresenceLog _ = False

View file

@ -21,16 +21,13 @@ import qualified Data.Set as S
import Data.Time.Clock.POSIX
import Common.Annex
import Logs
import qualified Annex.Branch
import qualified Annex
import Logs.UUIDBased
import Types.Group
import Types.StandardGroups
{- Filename of group.log. -}
groupLog :: FilePath
groupLog = "group.log"
{- Returns the groups of a given repo UUID. -}
lookupGroups :: UUID -> Annex (S.Set Group)
lookupGroups u = (fromMaybe S.empty . M.lookup u) . groupsByUUID <$> groupMap

View file

@ -20,12 +20,11 @@ module Logs.Location (
loggedLocations,
loggedKeys,
loggedKeysFor,
logFile,
logFileKey
) where
import Common.Annex
import qualified Annex.Branch
import Logs
import Logs.Presence
import Annex.UUID
@ -37,19 +36,19 @@ logStatus key status = do
{- Log a change in the presence of a key's value in a repository. -}
logChange :: Key -> UUID -> LogStatus -> Annex ()
logChange key (UUID u) s = addLog (logFile key) =<< logNow s u
logChange key (UUID u) s = addLog (locationLogFile key) =<< logNow s u
logChange _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have
- the value of a key.
-}
loggedLocations :: Key -> Annex [UUID]
loggedLocations key = map toUUID <$> (currentLog . logFile) key
loggedLocations key = map toUUID <$> (currentLog . locationLogFile) key
{- Finds all keys that have location log information.
- (There may be duplicate keys in the list.) -}
loggedKeys :: Annex [Key]
loggedKeys = mapMaybe (logFileKey . takeFileName) <$> Annex.Branch.files
loggedKeys = mapMaybe locationLogFileKey <$> Annex.Branch.files
{- Finds all keys that have location log information indicating
- they are present for the specified repository. -}
@ -62,15 +61,3 @@ loggedKeysFor u = filterM isthere =<< loggedKeys
us <- loggedLocations k
let !there = u `elem` us
return there
{- The filename of the log file for a given key. -}
logFile :: Key -> String
logFile key = hashDirLower key ++ keyFile key ++ ".log"
{- Converts a log filename into a key. -}
logFileKey :: FilePath -> Maybe Key
logFileKey file
| ext == ".log" = fileKey base
| otherwise = Nothing
where
(base, ext) = splitAt (length file - 4) file

View file

@ -24,6 +24,7 @@ import Data.Time.Clock.POSIX
import Common.Annex
import qualified Annex.Branch
import qualified Annex
import Logs
import Logs.UUIDBased
import Limit
import qualified Utility.Matcher
@ -35,10 +36,6 @@ import Logs.Group
import Logs.Remote
import Types.StandardGroups
{- Filename of preferred-content.log. -}
preferredContentLog :: FilePath
preferredContentLog = "preferred-content.log"
{- Changes the preferred content configuration of a remote. -}
preferredContentSet :: UUID -> String -> Annex ()
preferredContentSet uuid@(UUID _) val = do

View file

@ -12,36 +12,18 @@
-}
module Logs.Presence (
LogStatus(..),
LogLine(LogLine),
module X,
addLog,
readLog,
getLog,
parseLog,
showLog,
logNow,
compactLog,
currentLog,
prop_parse_show_log,
currentLog
) where
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import qualified Data.Map as M
import Logs.Presence.Pure as X
import Common.Annex
import qualified Annex.Branch
import Utility.QuickCheck
data LogLine = LogLine {
date :: POSIXTime,
status :: LogStatus,
info :: String
} deriving (Eq, Show)
data LogStatus = InfoPresent | InfoMissing
deriving (Eq, Show, Bounded, Enum)
addLog :: FilePath -> LogLine -> Annex ()
addLog file line = Annex.Branch.change file $ \s ->
@ -52,29 +34,6 @@ addLog file line = Annex.Branch.change file $ \s ->
readLog :: FilePath -> Annex [LogLine]
readLog = parseLog <$$> Annex.Branch.get
{- Parses a log file. Unparseable lines are ignored. -}
parseLog :: String -> [LogLine]
parseLog = mapMaybe parseline . lines
where
parseline l = LogLine
<$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d)
<*> parsestatus s
<*> pure rest
where
(d, pastd) = separate (== ' ') l
(s, rest) = separate (== ' ') pastd
parsestatus "1" = Just InfoPresent
parsestatus "0" = Just InfoMissing
parsestatus _ = Nothing
{- Generates a log file. -}
showLog :: [LogLine] -> String
showLog = unlines . map genline
where
genline (LogLine d s i) = unwords [show d, genstatus s, i]
genstatus InfoPresent = "1"
genstatus InfoMissing = "0"
{- Generates a new LogLine with the current date. -}
logNow :: LogStatus -> String -> Annex LogLine
logNow s i = do
@ -84,39 +43,3 @@ logNow s i = do
{- Reads a log and returns only the info that is still in effect. -}
currentLog :: FilePath -> Annex [String]
currentLog file = map info . filterPresent <$> readLog file
{- Given a log, returns only the info that is are still in effect. -}
getLog :: String -> [String]
getLog = map info . filterPresent . parseLog
{- Returns the info from LogLines that are in effect. -}
filterPresent :: [LogLine] -> [LogLine]
filterPresent = filter (\l -> InfoPresent == status l) . compactLog
{- Compacts a set of logs, returning a subset that contains the current
- status. -}
compactLog :: [LogLine] -> [LogLine]
compactLog = M.elems . foldr mapLog M.empty
type LogMap = M.Map String LogLine
{- Inserts a log into a map of logs, if the log has better (ie, newer)
- information than the other logs in the map -}
mapLog :: LogLine -> LogMap -> LogMap
mapLog l m
| better = M.insert i l m
| otherwise = m
where
better = maybe True newer $ M.lookup i m
newer l' = date l' <= date l
i = info l
instance Arbitrary LogLine where
arbitrary = LogLine
<$> arbitrary
<*> elements [minBound..maxBound]
<*> arbitrary `suchThat` ('\n' `notElem`)
prop_parse_show_log :: [LogLine] -> Bool
prop_parse_show_log l = parseLog (showLog l) == l

84
Logs/Presence/Pure.hs Normal file
View file

@ -0,0 +1,84 @@
{- git-annex presence log, pure operations
-
- Copyright 2010-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Presence.Pure where
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import qualified Data.Map as M
import Common.Annex
import Utility.QuickCheck
data LogLine = LogLine {
date :: POSIXTime,
status :: LogStatus,
info :: String
} deriving (Eq, Show)
data LogStatus = InfoPresent | InfoMissing
deriving (Eq, Show, Bounded, Enum)
{- Parses a log file. Unparseable lines are ignored. -}
parseLog :: String -> [LogLine]
parseLog = mapMaybe parseline . lines
where
parseline l = LogLine
<$> (utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" d)
<*> parsestatus s
<*> pure rest
where
(d, pastd) = separate (== ' ') l
(s, rest) = separate (== ' ') pastd
parsestatus "1" = Just InfoPresent
parsestatus "0" = Just InfoMissing
parsestatus _ = Nothing
{- Generates a log file. -}
showLog :: [LogLine] -> String
showLog = unlines . map genline
where
genline (LogLine d s i) = unwords [show d, genstatus s, i]
genstatus InfoPresent = "1"
genstatus InfoMissing = "0"
{- Given a log, returns only the info that is are still in effect. -}
getLog :: String -> [String]
getLog = map info . filterPresent . parseLog
{- Returns the info from LogLines that are in effect. -}
filterPresent :: [LogLine] -> [LogLine]
filterPresent = filter (\l -> InfoPresent == status l) . compactLog
{- Compacts a set of logs, returning a subset that contains the current
- status. -}
compactLog :: [LogLine] -> [LogLine]
compactLog = M.elems . foldr mapLog M.empty
type LogMap = M.Map String LogLine
{- Inserts a log into a map of logs, if the log has better (ie, newer)
- information than the other logs in the map -}
mapLog :: LogLine -> LogMap -> LogMap
mapLog l m
| better = M.insert i l m
| otherwise = m
where
better = maybe True newer $ M.lookup i m
newer l' = date l' <= date l
i = info l
instance Arbitrary LogLine where
arbitrary = LogLine
<$> arbitrary
<*> elements [minBound..maxBound]
<*> arbitrary `suchThat` ('\n' `notElem`)
prop_parse_show_log :: [LogLine] -> Bool
prop_parse_show_log l = parseLog (showLog l) == l

View file

@ -25,12 +25,9 @@ import Data.Char
import Common.Annex
import qualified Annex.Branch
import Types.Remote
import Logs
import Logs.UUIDBased
{- Filename of remote.log. -}
remoteLog :: FilePath
remoteLog = "remote.log"
{- Adds or updates a remote's config in the log. -}
configSet :: UUID -> RemoteConfig -> Annex ()
configSet u c = do

87
Logs/Transitions.hs Normal file
View file

@ -0,0 +1,87 @@
{- git-annex transitions log
-
- This is used to record transitions that have been performed on the
- git-annex branch, and when the transition was first started.
-
- We can quickly detect when the local branch has already had an transition
- done that is listed in the remote branch by checking that the local
- branch contains the same transition, with the same or newer start time.
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Transitions where
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import qualified Data.Set as S
import Common.Annex
transitionsLog :: FilePath
transitionsLog = "transitions.log"
data Transition
= ForgetGitHistory
| ForgetDeadRemotes
deriving (Show, Ord, Eq, Read)
data TransitionLine = TransitionLine
{ transitionStarted :: POSIXTime
, transition :: Transition
} deriving (Show, Ord, Eq)
type Transitions = S.Set TransitionLine
describeTransition :: Transition -> String
describeTransition ForgetGitHistory = "forget git history"
describeTransition ForgetDeadRemotes = "forget dead remotes"
noTransitions :: Transitions
noTransitions = S.empty
addTransition :: POSIXTime -> Transition -> Transitions -> Transitions
addTransition ts t = S.insert $ TransitionLine ts t
showTransitions :: Transitions -> String
showTransitions = unlines . map showTransitionLine . S.elems
{- If the log contains new transitions we don't support, returns Nothing. -}
parseTransitions :: String -> Maybe Transitions
parseTransitions = check . map parseTransitionLine . lines
where
check l
| all isJust l = Just $ S.fromList $ catMaybes l
| otherwise = Nothing
parseTransitionsStrictly :: String -> String -> Transitions
parseTransitionsStrictly source = fromMaybe badsource . parseTransitions
where
badsource = error $ "unknown transitions listed in " ++ source ++ "; upgrade git-annex!"
showTransitionLine :: TransitionLine -> String
showTransitionLine (TransitionLine ts t) = unwords [show t, show ts]
parseTransitionLine :: String -> Maybe TransitionLine
parseTransitionLine s = TransitionLine <$> pdate ds <*> readish ts
where
ws = words s
ts = Prelude.head ws
ds = unwords $ Prelude.tail ws
pdate = parseTime defaultTimeLocale "%s%Qs" >=*> utcTimeToPOSIXSeconds
combineTransitions :: [Transitions] -> Transitions
combineTransitions = S.unions
transitionList :: Transitions -> [Transition]
transitionList = map transition . S.elems
{- Typically ran with Annex.Branch.change, but we can't import Annex.Branch
- here since it depends on this module. -}
recordTransitions :: (FilePath -> (String -> String) -> Annex ()) -> Transitions -> Annex ()
recordTransitions changer t = do
changer transitionsLog $
showTransitions . S.union t . parseTransitionsStrictly "local"

View file

@ -6,6 +6,7 @@
-}
module Logs.Trust (
module X,
trustLog,
TrustLevel(..),
trustGet,
@ -16,8 +17,6 @@ module Logs.Trust (
lookupTrust,
trustMapLoad,
trustMapRaw,
prop_parse_show_TrustLog,
) where
import qualified Data.Map as M
@ -27,13 +26,11 @@ import Common.Annex
import Types.TrustLevel
import qualified Annex.Branch
import qualified Annex
import Logs
import Logs.UUIDBased
import Remote.List
import qualified Types.Remote
{- Filename of trust.log. -}
trustLog :: FilePath
trustLog = "trust.log"
import Logs.Trust.Pure as X
{- Returns a list of UUIDs that the trustLog indicates have the
- specified trust level.
@ -97,26 +94,4 @@ trustMapLoad = do
{- Does not include forcetrust or git config values, just those from the
- log file. -}
trustMapRaw :: Annex TrustMap
trustMapRaw = simpleMap . parseLog (Just . parseTrustLog)
<$> Annex.Branch.get trustLog
{- The trust.log used to only list trusted repos, without a field for the
- trust status, which is why this defaults to Trusted. -}
parseTrustLog :: String -> TrustLevel
parseTrustLog s = maybe Trusted parse $ headMaybe $ words s
where
parse "1" = Trusted
parse "0" = UnTrusted
parse "X" = DeadTrusted
parse _ = SemiTrusted
showTrustLog :: TrustLevel -> String
showTrustLog Trusted = "1"
showTrustLog UnTrusted = "0"
showTrustLog DeadTrusted = "X"
showTrustLog SemiTrusted = "?"
prop_parse_show_TrustLog :: Bool
prop_parse_show_TrustLog = all check [minBound .. maxBound]
where
check l = parseTrustLog (showTrustLog l) == l
trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog

36
Logs/Trust/Pure.hs Normal file
View file

@ -0,0 +1,36 @@
{- git-annex trust log, pure operations
-
- Copyright 2010-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Trust.Pure where
import Common.Annex
import Types.TrustLevel
import Logs.UUIDBased
calcTrustMap :: String -> TrustMap
calcTrustMap = simpleMap . parseLog (Just . parseTrustLog)
{- The trust.log used to only list trusted repos, without a field for the
- trust status, which is why this defaults to Trusted. -}
parseTrustLog :: String -> TrustLevel
parseTrustLog s = maybe Trusted parse $ headMaybe $ words s
where
parse "1" = Trusted
parse "0" = UnTrusted
parse "X" = DeadTrusted
parse _ = SemiTrusted
showTrustLog :: TrustLevel -> String
showTrustLog Trusted = "1"
showTrustLog UnTrusted = "0"
showTrustLog DeadTrusted = "X"
showTrustLog SemiTrusted = "?"
prop_parse_show_TrustLog :: Bool
prop_parse_show_TrustLog = all check [minBound .. maxBound]
where
check l = parseTrustLog (showTrustLog l) == l

View file

@ -28,13 +28,10 @@ import Types.UUID
import Common.Annex
import qualified Annex
import qualified Annex.Branch
import Logs
import Logs.UUIDBased
import qualified Annex.UUID
{- Filename of uuid.log. -}
uuidLog :: FilePath
uuidLog = "uuid.log"
{- Records a description for a uuid in the log. -}
describeUUID :: UUID -> String -> Annex ()
describeUUID uuid desc = do

View file

@ -11,8 +11,6 @@ module Logs.Web (
getUrls,
setUrlPresent,
setUrlMissing,
urlLog,
urlLogKey,
knownUrls,
Downloader(..),
getDownloader,
@ -22,9 +20,9 @@ module Logs.Web (
import qualified Data.ByteString.Lazy.Char8 as L
import Common.Annex
import Logs
import Logs.Presence
import Logs.Location
import Types.Key
import qualified Annex.Branch
import Annex.CatFile
import qualified Git
@ -36,35 +34,9 @@ type URLString = String
webUUID :: UUID
webUUID = UUID "00000000-0000-0000-0000-000000000001"
urlLogExt :: String
urlLogExt = ".log.web"
urlLog :: Key -> FilePath
urlLog key = hashDirLower key </> keyFile key ++ urlLogExt
{- Converts a url log file into a key.
- (Does not work on oldurlLogs.) -}
urlLogKey :: FilePath -> Maybe Key
urlLogKey file
| ext == urlLogExt = fileKey base
| otherwise = Nothing
where
(base, ext) = splitAt (length file - extlen) file
extlen = length urlLogExt
isUrlLog :: FilePath -> Bool
isUrlLog file = urlLogExt `isSuffixOf` file
{- Used to store the urls elsewhere. -}
oldurlLogs :: Key -> [FilePath]
oldurlLogs key =
[ "remote/web" </> hashDirLower key </> key2file key ++ ".log"
, "remote/web" </> hashDirLower key </> keyFile key ++ ".log"
]
{- Gets all urls that a key might be available from. -}
getUrls :: Key -> Annex [URLString]
getUrls key = go $ urlLog key : oldurlLogs key
getUrls key = go $ urlLogFile key : oldurlLogs key
where
go [] = return []
go (l:ls) = do
@ -77,13 +49,13 @@ setUrlPresent :: Key -> URLString -> Annex ()
setUrlPresent key url = do
us <- getUrls key
unless (url `elem` us) $ do
addLog (urlLog key) =<< logNow InfoPresent url
addLog (urlLogFile key) =<< logNow InfoPresent url
-- update location log to indicate that the web has the key
logChange key webUUID InfoPresent
setUrlMissing :: Key -> URLString -> Annex ()
setUrlMissing key url = do
addLog (urlLog key) =<< logNow InfoMissing url
addLog (urlLogFile key) =<< logNow InfoMissing url
whenM (null <$> getUrls key) $
logChange key webUUID InfoMissing

View file

@ -33,6 +33,7 @@ import qualified Types.KeySource
import qualified Types.Backend
import qualified Types.TrustLevel
import qualified Types
import qualified Logs
import qualified Logs.UUIDBased
import qualified Logs.Trust
import qualified Logs.Remote
@ -115,6 +116,7 @@ quickcheck =
, check "prop_idempotent_key_encode" Types.Key.prop_idempotent_key_encode
, check "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape
, check "prop_idempotent_shellEscape_multiword" Utility.SafeCommand.prop_idempotent_shellEscape_multiword
, check "prop_logs_sane" Logs.prop_logs_sane
, check "prop_idempotent_configEscape" Logs.Remote.prop_idempotent_configEscape
, check "prop_parse_show_Config" Logs.Remote.prop_parse_show_Config
, check "prop_parentDir_basics" Utility.Path.prop_parentDir_basics

View file

@ -12,9 +12,9 @@ import qualified Git
import qualified Git.Command
import qualified Git.Ref
import qualified Annex.Branch
import Logs.Location
import Annex.Content
import Utility.Tmp
import Logs
olddir :: Git.Repo -> FilePath
olddir g
@ -47,7 +47,7 @@ upgrade = do
e <- liftIO $ doesDirectoryExist old
when e $ do
mapM_ (\(k, f) -> inject f $ logFile k) =<< locationLogs
mapM_ (\(k, f) -> inject f $ locationLogFile k) =<< locationLogs
mapM_ (\f -> inject f f) =<< logFiles old
saveState False
@ -73,7 +73,7 @@ locationLogs = do
where
tryDirContents d = catchDefaultIO [] $ dirContents d
islogfile f = maybe Nothing (\k -> Just (k, f)) $
logFileKey $ takeFileName f
locationLogFileKey f
inject :: FilePath -> FilePath -> Annex ()
inject source dest = do

View file

@ -91,6 +91,12 @@ massReplace vs = go [] vs
go (replacement:acc) vs (drop (length val) s)
| otherwise = go acc rest s
{- First item in the list that is not Nothing. -}
firstJust :: Eq a => [Maybe a] -> Maybe a
firstJust ms = case dropWhile (== Nothing) ms of
[] -> Nothing
(md:_) -> md
{- Given two orderings, returns the second if the first is EQ and returns
- the first otherwise.
-

9
debian/changelog vendored
View file

@ -1,12 +1,17 @@
git-annex (4.20130828) UNRELEASED; urgency=low
* forget: New command, causes git-annex branch history to be forgotten
in a way that will spread to other clones of the repository.
(As long as they're running this version or newer of git-annex.)
* forget --drop-dead: Completely removes mentions of repositories that
have been marked as dead from the git-annex branch.
* sync, assistant: Force push of the git-annex branch. Necessary
to ensure it gets pushed to remotes after being rewritten by forget.
* importfeed: Also ignore transient problems with downloading content
from feeds.
* Honor core.sharedrepository when receiving and adding files in direct
mode.
-- Joey Hess <joeyh@debian.org> Tue, 03 Sep 2013 14:31:45 -0400
git-annex (4.20130827) unstable; urgency=low
* Youtube support! (And 53 other video hosts). When quvi is installed,

View file

@ -479,6 +479,23 @@ subdirectories).
Upgrades the repository to current layout.
* forget
Causes the git-annex branch to be rewritten, throwing away historical
data about past locations of files. The resulting branch will use less
space, but `git annex log` will not be able to show where
files used to be located.
To also prune references to repositories that have been marked as dead,
specify --drop-dead.
When this rewritten branch is merged into other clones of
the repository, git-annex will automatically perform the same rewriting
to their local git-annex branches. So the forgetfulness will automatically
propigate out from its starting point until all repositories running
git-annex have forgotten their old history. (You may need to force
git to push the branch to any git repositories not running git-annex.
# QUERY COMMANDS
* version