diff --git a/Backend/File.hs b/Backend/File.hs index f88bb7c704..0b1cdd8e52 100644 --- a/Backend/File.hs +++ b/Backend/File.hs @@ -16,6 +16,7 @@ module Backend.File (backend, checkKey) where import Control.Monad.State import System.Directory +import Data.List (intersect) import TypeInternals import LocationLog @@ -91,11 +92,16 @@ checkRemoveKey key numcopiesM = do if force || numcopiesM == Just 0 then return True else do + g <- Annex.gitRepo + locations <- liftIO $ keyLocations g key + trusted <- getTrusted + let trustedlocations = intersect locations trusted remotes <- Remotes.keyPossibilities key + untrustedremotes <- reposWithoutUUID remotes trusted numcopies <- getNumCopies numcopiesM - if numcopies > length remotes - then notEnoughCopies numcopies (length remotes) [] - else findcopies numcopies 0 remotes [] + if numcopies > length untrustedremotes + then notEnoughCopies numcopies (length untrustedremotes) [] + else findcopies numcopies (length trustedlocations) untrustedremotes [] where findcopies need have [] bad | have >= need = return True diff --git a/CmdLine.hs b/CmdLine.hs index cb164a6ab2..7eab0a7e28 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -34,6 +34,8 @@ import qualified Command.Lock import qualified Command.PreCommit import qualified Command.Find import qualified Command.Uninit +import qualified Command.Trust +import qualified Command.Untrust subCmds :: [SubCommand] subCmds = @@ -61,6 +63,10 @@ subCmds = "de-initialize git-annex and clean out repository" , SubCommand "pre-commit" path Command.PreCommit.seek "run by git pre-commit hook" + , SubCommand "trust" remote Command.Trust.seek + "trust a repository" + , SubCommand "untrust" remote Command.Untrust.seek + "do not trust a repository" , SubCommand "fromkey" key Command.FromKey.seek "adds a file using a specific key" , SubCommand "dropkey" key Command.DropKey.seek @@ -84,6 +90,7 @@ subCmds = key = "KEY ..." desc = "DESCRIPTION" number = "NUMBER ..." + remote = "REMOTE ..." nothing = "" -- Each dashed command-line option results in generation of an action diff --git a/Remotes.hs b/Remotes.hs index cf49b624b4..725348a6a3 100644 --- a/Remotes.hs +++ b/Remotes.hs @@ -12,6 +12,7 @@ module Remotes ( inAnnex, same, commandLineRemote, + byName, copyFromRemote, copyToRemote, runCmd @@ -156,6 +157,11 @@ commandLineRemote = do fromName <- Annex.flagGet "fromrepository" toName <- Annex.flagGet "torepository" let name = if null fromName then toName else fromName + byName name + +{- Looks up a remote by name. -} +byName :: String -> Annex Git.Repo +byName name = do when (null name) $ error "no remote specified" g <- Annex.gitRepo let match = filter (\r -> name == Git.repoRemoteName r) $ diff --git a/UUID.hs b/UUID.hs index 41a35327d9..3e43c80612 100644 --- a/UUID.hs +++ b/UUID.hs @@ -14,9 +14,13 @@ module UUID ( prepUUID, genUUID, reposByUUID, + reposWithoutUUID, prettyPrintUUIDs, describeUUID, - uuidLog + uuidLog, + trustLog, + getTrusted, + setTrusted ) where import Control.Monad.State @@ -24,9 +28,7 @@ import Data.Maybe import Data.List import System.Cmd.Utils import System.IO -import System.Directory import qualified Data.Map as M -import System.Posix.Process import qualified GitRepo as Git import Types @@ -85,6 +87,14 @@ reposByUUID repos uuids = filterM match repos u <- getUUID r return $ isJust $ elemIndex u uuids +{- Filters a list of repos to ones that do not have the listed UUIDs. -} +reposWithoutUUID :: [Git.Repo] -> [UUID] -> Annex [Git.Repo] +reposWithoutUUID repos uuids = filterM unmatch repos + where + unmatch r = do + u <- getUUID r + return $ not $ isJust $ elemIndex u uuids + {- Pretty-prints a list of UUIDs -} prettyPrintUUIDs :: [UUID] -> Annex String prettyPrintUUIDs uuids = do @@ -103,11 +113,7 @@ describeUUID uuid desc = do m <- uuidMap let m' = M.insert uuid desc m logfile <- uuidLog - pid <- liftIO $ getProcessID - let tmplogfile = logfile ++ ".tmp" ++ show pid - liftIO $ createDirectoryIfMissing True (parentDir logfile) - liftIO $ writeFile tmplogfile $ serialize m' - liftIO $ renameFile tmplogfile logfile + liftIO $ safeWriteFile logfile (serialize m') where serialize m = unlines $ map (\(u, d) -> u++" "++d) $ M.toList m @@ -125,7 +131,28 @@ uuidMap = do ignoreerror _ = return "" {- Filename of uuid.log. -} -uuidLog :: Annex String +uuidLog :: Annex FilePath uuidLog = do g <- Annex.gitRepo return $ gitStateDir g ++ "uuid.log" + +{- Filename of trust.log. -} +trustLog :: Annex FilePath +trustLog = do + g <- Annex.gitRepo + return $ gitStateDir g ++ "trust.log" + +{- List of trusted UUIDs. -} +getTrusted :: Annex [UUID] +getTrusted = do + logfile <- trustLog + s <- liftIO $ catch (readFile logfile) ignoreerror + return $ map (\l -> head $ words l) $ lines s + where + ignoreerror _ = return "" + +{- Changes the list of trusted UUIDs. -} +setTrusted :: [UUID] -> Annex () +setTrusted u = do + logfile <- trustLog + liftIO $ safeWriteFile logfile $ unlines u diff --git a/Utility.hs b/Utility.hs index 2447c95a0b..3a6c757515 100644 --- a/Utility.hs +++ b/Utility.hs @@ -15,7 +15,8 @@ module Utility ( boolSystem, shellEscape, unsetFileMode, - readMaybe + readMaybe, + safeWriteFile ) where import System.IO @@ -139,3 +140,12 @@ readMaybe :: (Read a) => String -> Maybe a readMaybe s = case reads s of ((x,_):_) -> Just x _ -> Nothing + +{- Writes a file using a temp file that is renamed atomically into place. -} +safeWriteFile :: FilePath -> String -> IO () +safeWriteFile file content = do + pid <- getProcessID + let tmpfile = file ++ ".tmp" ++ show pid + createDirectoryIfMissing True (parentDir file) + writeFile tmpfile content + renameFile tmpfile file diff --git a/debian/changelog b/debian/changelog index dba343deff..9be8b37fd0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,8 @@ git-annex (0.15) UNRELEASED; urgency=low * Support scp-style urls for remotes (host:path). + * Add trust and untrust subcommands, to allow configuring remotes + that are trusted to retain files without explicit checking. -- Joey Hess Tue, 28 Dec 2010 13:13:20 -0400 diff --git a/doc/copies.mdwn b/doc/copies.mdwn index 165e54b340..5b3cbf5154 100644 --- a/doc/copies.mdwn +++ b/doc/copies.mdwn @@ -11,7 +11,8 @@ setting in `.gitattributes` files. `git annex drop` attempts to check with other git remotes, to check that N copies of the file exist. If enough repositories cannot be verified to have -it, it will retain the file content to avoid data loss. +it, it will retain the file content to avoid data loss. Note that +[[trusted_remotes|trust]] are not explicitly checked. For example, consider three repositories: Server, Laptop, and USB. Both Server and USB have a copy of a file, and N=1. If on Laptop, you `git annex get diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 71a4889ac7..caef49d977 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -171,6 +171,15 @@ Many git-annex subcommands will stage changes for later `git commit` by you. This is meant to be called from git's pre-commit hook. `git annex init` automatically creates a pre-commit hook using this. +* trust [repository ...] + + Records that a repository is [[trusted]] to not unexpectedly lose content. + Use with care. + +* untrust [repository ...] + + Undoes a trust command. + * fromkey file This can be used to maually set up a file to link to a specified key @@ -333,7 +342,9 @@ These files are used by git-annex, in your git repository: available. Annexed files in your git repository symlink to that content. `.git-annex/uuid.log` is used to map between repository UUID and -decscriptions. You may edit it. +decscriptions. + +`.git-annex/trust.log` is used to list the UUIDs of trusted repositories. `.git-annex/*.log` is where git-annex records its content tracking information. These files should be committed to git. diff --git a/doc/location_tracking.mdwn b/doc/location_tracking.mdwn index a7d5c150b1..3791029f8f 100644 --- a/doc/location_tracking.mdwn +++ b/doc/location_tracking.mdwn @@ -26,3 +26,6 @@ descriptions to help you with finding them: Try making some of these repositories available: c0a28e06-d7ef-11df-885c-775af44f8882 -- USB archive drive 1 e1938fee-d95b-11df-96cc-002170d25c55 + +In certian cases you may want to configure git-annex to [[trust]] +that location tracking information is always correct for a repository. diff --git a/doc/trust.mdwn b/doc/trust.mdwn new file mode 100644 index 0000000000..b04a112ecf --- /dev/null +++ b/doc/trust.mdwn @@ -0,0 +1,20 @@ +Normally, git-annex does not fully trust its stored [[location_tracking]] +information. When removing content, it will directly check +that other repositories have [[copies]]. + +Generally that explicit checking is a good idea. Consider that the current +[[location_tracking]] information for a remote may not yet have propigated +out. Or, a remote may have suffered a catastrophic loss of data, or itself +been lost. + +Sometimes though, you may have reasons to trust the location tracking +information for a remote repository. For example, it may be an offline +archival drive, from which you rarely or never remove content. Deciding +when it makes sense to trust the tracking info is up to you. + +One way to handle this is just to use `--force` when a command cannot +access a remote you trust. + +Another option is to configure which remotes you trust with the +`git annex trust` command, or by manually adding the UUIDs of trusted remotes +to `.git-annex/trust.log`. diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn index 9c8106d513..b486a4b1ff 100644 --- a/doc/walkthrough.mdwn +++ b/doc/walkthrough.mdwn @@ -134,7 +134,7 @@ you'll see something like this. (Use --force to override this check, or adjust annex.numcopies.) failed -Here you might --force it to drop `important_file` if you trust your backup. +Here you might --force it to drop `important_file` if you [[trust]] your backup. But `other.iso` looks to have never been copied to anywhere else, so if it's something you want to hold onto, you'd need to transfer it to some other repository before dropping it.