diff --git a/Annex.hs b/Annex.hs index 5e08c7acf9..642cbb9f73 100644 --- a/Annex.hs +++ b/Annex.hs @@ -26,19 +26,21 @@ module Annex ( fromRepo, ) where -import Control.Monad.State +import Control.Monad.State.Strict import System.Posix.Types (Fd) import Common import qualified Git import qualified Git.Config import Git.CatFile +import Git.CheckAttr import qualified Git.Queue import Types.Backend import qualified Types.Remote import Types.Crypto import Types.BranchState import Types.TrustLevel +import Utility.State import qualified Utility.Matcher import qualified Data.Map as M @@ -73,6 +75,7 @@ data AnnexState = AnnexState , auto :: Bool , branchstate :: BranchState , catfilehandle :: Maybe CatFileHandle + , checkattrhandle :: Maybe CheckAttrHandle , forcebackend :: Maybe String , forcenumcopies :: Maybe Int , limit :: Matcher (FilePath -> Annex Bool) @@ -96,6 +99,7 @@ newState gitrepo = AnnexState , auto = False , branchstate = startBranchState , catfilehandle = Nothing + , checkattrhandle = Nothing , forcebackend = Nothing , forcenumcopies = Nothing , limit = Left [] @@ -117,18 +121,6 @@ run s a = runStateT (runAnnex a) s eval :: AnnexState -> Annex a -> IO a eval s a = evalStateT (runAnnex a) s -{- Gets a value from the internal state, selected by the passed value - - constructor. -} -getState :: (AnnexState -> a) -> Annex a -getState = gets - -{- Applies a state mutation function to change the internal state. - - - - Example: changeState $ \s -> s { output = QuietOutput } - -} -changeState :: (AnnexState -> AnnexState) -> Annex () -changeState = modify - {- Sets a flag to True -} setFlag :: String -> Annex () setFlag flag = changeState $ \s -> diff --git a/Annex/Branch.hs b/Annex/Branch.hs index b2b1ed3e40..f20c87b4a3 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -1,6 +1,6 @@ {- management of the git-annex branch - - - Copyright 2011 Joey Hess + - Copyright 2011-2012 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -32,7 +32,7 @@ import qualified Git.Command import qualified Git.Ref import qualified Git.Branch import qualified Git.UnionMerge -import qualified Git.HashObject +import Git.HashObject import qualified Git.Index import Annex.CatFile @@ -190,7 +190,7 @@ commit message = whenM journalDirty $ lockJournal $ do {- Commits the staged changes in the index to the branch. - - Ensures that the branch's index file is first updated to the state - - of the brannch at branchref, before running the commit action. This + - of the branch at branchref, before running the commit action. This - is needed because the branch may have had changes pushed to it, that - are not yet reflected in the index. - @@ -307,13 +307,14 @@ stageJournal = do fs <- getJournalFiles g <- gitRepo withIndex $ liftIO $ do - let dir = gitAnnexJournalDir g - let paths = map (dir ) fs - (shas, cleanup) <- Git.HashObject.hashFiles paths g - Git.UnionMerge.update_index g $ - index_lines shas (map fileJournal fs) - cleanup - mapM_ removeFile paths + h <- hashObjectStart g + Git.UnionMerge.stream_update_index g + [genstream (gitAnnexJournalDir g) h fs] + hashObjectStop h where - index_lines shas = map genline . zip shas - genline (sha, file) = Git.UnionMerge.update_index_line sha file + genstream dir h fs streamer = forM_ fs $ \file -> do + let path = dir file + sha <- hashFile h path + streamer $ Git.UnionMerge.update_index_line + sha (fileJournal file) + removeFile path diff --git a/Annex/CheckAttr.hs b/Annex/CheckAttr.hs new file mode 100644 index 0000000000..01779e8136 --- /dev/null +++ b/Annex/CheckAttr.hs @@ -0,0 +1,35 @@ +{- git check-attr interface, with handle automatically stored in the Annex monad + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.CheckAttr ( + checkAttr, + checkAttrHandle +) where + +import Common.Annex +import qualified Git.CheckAttr as Git +import qualified Annex + +{- All gitattributes used by git-annex. -} +annexAttrs :: [Git.Attr] +annexAttrs = + [ "annex.backend" + , "annex.numcopies" + ] + +checkAttr :: Git.Attr -> FilePath -> Annex String +checkAttr attr file = do + h <- checkAttrHandle + liftIO $ Git.checkAttr h attr file + +checkAttrHandle :: Annex Git.CheckAttrHandle +checkAttrHandle = maybe startup return =<< Annex.getState Annex.checkattrhandle + where + startup = do + h <- inRepo $ Git.checkAttrStart annexAttrs + Annex.changeState $ \s -> s { Annex.checkattrhandle = Just h } + return h diff --git a/Annex/Content.hs b/Annex/Content.hs index c21ac405ea..d10370bc9a 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -25,7 +25,6 @@ module Annex.Content ( preseedTmp, ) where -import System.IO.Error (try) import Control.Exception (bracket_) import System.Posix.Types @@ -79,7 +78,7 @@ lockContent key a = do where lock Nothing = return Nothing lock (Just l) = do - v <- try $ setLock l (WriteLock, AbsoluteSeek, 0, 0) + v <- tryIO $ setLock l (WriteLock, AbsoluteSeek, 0, 0) case v of Left _ -> error "content is locked" Right _ -> return $ Just l @@ -291,11 +290,16 @@ getKeysPresent' dir = do let files = concat contents return $ mapMaybe (fileKey . takeFileName) files -{- Things to do to record changes to content. -} -saveState :: Annex () -saveState = do +{- Things to do to record changes to content when shutting down. + - + - It's acceptable to avoid committing changes to the branch, + - especially if performing a short-lived action. + -} +saveState :: Bool -> Annex () +saveState oneshot = do Annex.Queue.flush False - Annex.Branch.commit "update" + unless oneshot $ + Annex.Branch.commit "update" {- Downloads content from any of a list of urls. -} downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool diff --git a/Annex/Journal.hs b/Annex/Journal.hs index 9c5be89b19..34c4d98c88 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -91,4 +91,4 @@ lockJournal a = do {- Runs an action, catching failure and running something to fix it up, and - retrying if necessary. -} doRedo :: IO a -> IO b -> IO a -doRedo a b = catch a $ const $ b >> a +doRedo a b = catchIO a $ const $ b >> a diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs index c05e236040..184eb92caa 100644 --- a/Annex/Ssh.hs +++ b/Annex/Ssh.hs @@ -11,7 +11,6 @@ module Annex.Ssh ( ) where import qualified Data.Map as M -import System.IO.Error (try) import Common.Annex import Annex.LockPool @@ -72,18 +71,20 @@ sshCleanup = do let lockfile = socket2lock socketfile unlockFile lockfile fd <- liftIO $ openFd lockfile ReadWrite (Just stdFileMode) defaultFileFlags - v <- liftIO $ try $ setLock fd (WriteLock, AbsoluteSeek, 0, 0) + v <- liftIO $ tryIO $ + setLock fd (WriteLock, AbsoluteSeek, 0, 0) case v of Left _ -> return () Right _ -> stopssh socketfile liftIO $ closeFd fd stopssh socketfile = do - (_, params) <- sshInfo $ socket2hostport socketfile + let (host, port) = socket2hostport socketfile + (_, params) <- sshInfo (host, port) _ <- liftIO $ do -- "ssh -O stop" is noisy on stderr even with -q let cmd = unwords $ toCommand $ [ Params "-O stop" - ] ++ params + ] ++ params ++ [Param host] _ <- boolSystem "sh" [ Param "-c" , Param $ "ssh " ++ cmd ++ " >/dev/null 2>/dev/null" @@ -101,7 +102,7 @@ hostport2socket host (Just port) = host ++ "!" ++ show port socket2hostport :: FilePath -> (String, Maybe Integer) socket2hostport socket | null p = (h, Nothing) - | otherwise = (h, readMaybe p) + | otherwise = (h, readish p) where (h, p) = separate (== '!') $ takeFileName socket diff --git a/Backend.hs b/Backend.hs index 003d62bfcd..4c28f1c779 100644 --- a/Backend.hs +++ b/Backend.hs @@ -6,23 +6,21 @@ -} module Backend ( - BackendFile, list, orderedList, genKey, lookupFile, - chooseBackends, + chooseBackend, lookupBackendName, maybeLookupBackendName ) where -import System.IO.Error (try) import System.Posix.Files import Common.Annex import qualified Git.Config -import qualified Git.CheckAttr import qualified Annex +import Annex.CheckAttr import Types.Key import qualified Types.Backend as B @@ -77,7 +75,7 @@ genKey' (b:bs) file = do - by examining what the file symlinks to. -} lookupFile :: FilePath -> Annex (Maybe (Key, Backend)) lookupFile file = do - tl <- liftIO $ try getsymlink + tl <- liftIO $ tryIO getsymlink case tl of Left _ -> return Nothing Right l -> makekey l @@ -94,20 +92,15 @@ lookupFile file = do bname ++ ")" return Nothing -type BackendFile = (Maybe Backend, FilePath) - -{- Looks up the backends that should be used for each file in a list. +{- Looks up the backend that should be used for a file. - That can be configured on a per-file basis in the gitattributes file. -} -chooseBackends :: [FilePath] -> Annex [BackendFile] -chooseBackends fs = Annex.getState Annex.forcebackend >>= go +chooseBackend :: FilePath -> Annex (Maybe Backend) +chooseBackend f = Annex.getState Annex.forcebackend >>= go where - go Nothing = do - pairs <- inRepo $ Git.CheckAttr.lookup "annex.backend" fs - return $ map (\(f,b) -> (maybeLookupBackendName b, f)) pairs - go (Just _) = do - l <- orderedList - return $ map (\f -> (Just $ Prelude.head l, f)) fs + go Nothing = maybeLookupBackendName <$> + checkAttr "annex.backend" f + go (Just _) = Just . Prelude.head <$> orderedList {- Looks up a backend by name. May fail if unknown. -} lookupBackendName :: String -> Backend diff --git a/Backend/URL.hs b/Backend/URL.hs index 6406095ca1..b3411bac5b 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -24,5 +24,9 @@ backend = Backend { fsckKey = Nothing } -fromUrl :: String -> Key -fromUrl url = stubKey { keyName = url, keyBackendName = "URL" } +fromUrl :: String -> Maybe Integer -> Key +fromUrl url size = stubKey + { keyName = url + , keyBackendName = "URL" + , keySize = size + } diff --git a/CmdLine.hs b/CmdLine.hs index 29b95d01bd..0bb3459124 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -11,7 +11,6 @@ module CmdLine ( shutdown ) where -import qualified System.IO.Error as IO import qualified Control.Exception as E import Control.Exception (throw) import System.Console.GetOpt @@ -40,7 +39,7 @@ dispatch args cmds commonoptions header getgitrepo = do (actions, state') <- Annex.run state $ do sequence_ flags prepCommand cmd params - tryRun state' cmd $ [startup] ++ actions ++ [shutdown] + tryRun state' cmd $ [startup] ++ actions ++ [shutdown $ cmdoneshot cmd] where (flags, cmd, params) = parseCmd args cmds commonoptions header @@ -72,9 +71,11 @@ tryRun' :: Integer -> Annex.AnnexState -> Command -> [CommandCleanup] -> IO () tryRun' errnum _ cmd [] | errnum > 0 = error $ cmdname cmd ++ ": " ++ show errnum ++ " failed" | otherwise = return () -tryRun' errnum state cmd (a:as) = run >>= handle +tryRun' errnum state cmd (a:as) = do + r <- run + handle $! r where - run = IO.try $ Annex.run state $ do + run = tryIO $ Annex.run state $ do Annex.Queue.flushWhenFull a handle (Left err) = showerr err >> cont False state @@ -89,9 +90,9 @@ startup :: Annex Bool startup = return True {- Cleanup actions. -} -shutdown :: Annex Bool -shutdown = do - saveState +shutdown :: Bool -> Annex Bool +shutdown oneshot = do + saveState oneshot liftIO Git.Command.reap -- zombies from long-running git processes sshCleanup -- ssh connection caching return True diff --git a/Command.hs b/Command.hs index 386efafde9..13ea167bbc 100644 --- a/Command.hs +++ b/Command.hs @@ -8,6 +8,7 @@ module Command ( command, noRepo, + oneShot, withOptions, next, stop, @@ -18,6 +19,7 @@ module Command ( ifAnnexed, notBareRepo, isBareRepo, + numCopies, autoCopies, module ReExported ) where @@ -34,10 +36,15 @@ import Checks as ReExported import Usage as ReExported import Logs.Trust import Config +import Annex.CheckAttr {- Generates a normal command -} command :: String -> String -> [CommandSeek] -> String -> Command -command = Command [] Nothing commonChecks +command = Command [] Nothing commonChecks False + +{- Makes a command run in oneshot mode. -} +oneShot :: Command -> Command +oneShot c = c { cmdoneshot = True } {- Adds a fallback action to a command, that will be run if it's used - outside a git repository. -} @@ -98,17 +105,22 @@ notBareRepo a = do isBareRepo :: Annex Bool isBareRepo = fromRepo Git.repoIsLocalBare +numCopies :: FilePath -> Annex (Maybe Int) +numCopies file = readish <$> checkAttr "annex.numcopies" file + {- Used for commands that have an auto mode that checks the number of known - copies of a key. - - In auto mode, first checks that the number of known - copies of the key is > or < than the numcopies setting, before running - the action. -} -autoCopies :: Key -> (Int -> Int -> Bool) -> Maybe Int -> CommandStart -> CommandStart -autoCopies key vs numcopiesattr a = Annex.getState Annex.auto >>= auto +autoCopies :: FilePath -> Key -> (Int -> Int -> Bool) -> (Maybe Int -> CommandStart) -> CommandStart +autoCopies file key vs a = do + numcopiesattr <- numCopies file + Annex.getState Annex.auto >>= auto numcopiesattr where - auto False = a - auto True = do + auto numcopiesattr False = a numcopiesattr + auto numcopiesattr True = do needed <- getNumCopies numcopiesattr (_, have) <- trustPartition UnTrusted =<< Remote.keyLocations key - if length have `vs` needed then a else stop + if length have `vs` needed then a numcopiesattr else stop diff --git a/Command/Add.hs b/Command/Add.hs index 9410601b8b..28971529a7 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -16,7 +16,6 @@ import qualified Backend import Logs.Location import Annex.Content import Utility.Touch -import Backend def :: [Command] def = [command "add" paramPaths seek "add files to annex"] @@ -28,8 +27,8 @@ seek = [withFilesNotInGit start, withFilesUnlocked start] {- The add subcommand annexes a file, storing it in a backend, and then - moving it into the annex directory and setting up the symlink pointing - to its content. -} -start :: BackendFile -> CommandStart -start p@(_, file) = notBareRepo $ ifAnnexed file fixup add +start :: FilePath -> CommandStart +start file = notBareRepo $ ifAnnexed file fixup add where add = do s <- liftIO $ getSymbolicLinkStatus file @@ -37,7 +36,7 @@ start p@(_, file) = notBareRepo $ ifAnnexed file fixup add then stop else do showStart "add" file - next $ perform p + next $ perform file fixup (key, _) = do -- fixup from an interrupted add; the symlink -- is present but not yet added to git @@ -45,8 +44,10 @@ start p@(_, file) = notBareRepo $ ifAnnexed file fixup add liftIO $ removeFile file next $ next $ cleanup file key =<< inAnnex key -perform :: BackendFile -> CommandPerform -perform (backend, file) = Backend.genKey file backend >>= go +perform :: FilePath -> CommandPerform +perform file = do + backend <- Backend.chooseBackend file + Backend.genKey file backend >>= go where go Nothing = stop go (Just (key, _)) = do diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 46584f0d81..f91d6dd553 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -15,37 +15,55 @@ import qualified Backend import qualified Command.Add import qualified Annex import qualified Backend.URL +import qualified Utility.Url as Url import Annex.Content import Logs.Web +import qualified Option +import Types.Key def :: [Command] -def = [command "addurl" (paramRepeating paramUrl) seek "add urls to annex"] +def = [withOptions [fileOption] $ + command "addurl" (paramRepeating paramUrl) seek "add urls to annex"] + +fileOption :: Option +fileOption = Option.field [] "file" paramFile "specify what file the url is added to" seek :: [CommandSeek] -seek = [withStrings start] +seek = [withField fileOption return $ \f -> + withStrings $ start f] -start :: String -> CommandStart -start s = notBareRepo $ go $ parseURI s +start :: Maybe FilePath -> String -> CommandStart +start optfile s = notBareRepo $ go $ fromMaybe bad $ parseURI s where - go Nothing = error $ "bad url " ++ s - go (Just url) = do - file <- liftIO $ url2file url + bad = fromMaybe (error $ "bad url " ++ s) $ + parseURI $ escapeURIString isUnescapedInURI s + go url = do + let file = fromMaybe (url2file url) optfile showStart "addurl" file next $ perform s file perform :: String -> FilePath -> CommandPerform -perform url file = do - fast <- Annex.getState Annex.fast - if fast then nodownload url file else download url file +perform url file = ifAnnexed file addurl geturl + where + geturl = do + whenM (liftIO $ doesFileExist file) $ + error $ "not overwriting existing " ++ file + fast <- Annex.getState Annex.fast + if fast then nodownload url file else download url file + addurl (key, _backend) = do + unlessM (liftIO $ Url.check url (keySize key)) $ + error $ "failed to verify url: " ++ url + setUrlPresent key url + next $ return True download :: String -> FilePath -> CommandPerform download url file = do showAction $ "downloading " ++ url ++ " " - let dummykey = Backend.URL.fromUrl url + let dummykey = Backend.URL.fromUrl url Nothing tmp <- fromRepo $ gitAnnexTmpLocation dummykey liftIO $ createDirectoryIfMissing True (parentDir tmp) stopUnless (downloadUrl [url] tmp) $ do - [(backend, _)] <- Backend.chooseBackends [file] + backend <- Backend.chooseBackend file k <- Backend.genKey tmp backend case k of Nothing -> stop @@ -56,16 +74,15 @@ download url file = do nodownload :: String -> FilePath -> CommandPerform nodownload url file = do - let key = Backend.URL.fromUrl url + (exists, size) <- liftIO $ Url.exists url + unless exists $ + error $ "unable to access url: " ++ url + let key = Backend.URL.fromUrl url size setUrlPresent key url next $ Command.Add.cleanup file key False -url2file :: URI -> IO FilePath -url2file url = do - whenM (doesFileExist file) $ - error $ "already have this url in " ++ file - return file +url2file :: URI -> FilePath +url2file url = escape $ uriRegName auth ++ uriPath url ++ uriQuery url where - file = escape $ uriRegName auth ++ uriPath url ++ uriQuery url escape = replace "/" "_" . replace "?" "_" auth = fromMaybe (error $ "bad url " ++ show url) $ uriAuthority url diff --git a/Command/ConfigList.hs b/Command/ConfigList.hs index dcf4d15093..fc4ba91022 100644 --- a/Command/ConfigList.hs +++ b/Command/ConfigList.hs @@ -12,7 +12,7 @@ import Command import Annex.UUID def :: [Command] -def = [command "configlist" paramNothing seek +def = [oneShot $ command "configlist" paramNothing seek "outputs relevant git configuration"] seek :: [CommandSeek] diff --git a/Command/Copy.hs b/Command/Copy.hs index 32b83a5262..a8ec225706 100644 --- a/Command/Copy.hs +++ b/Command/Copy.hs @@ -19,10 +19,10 @@ def = [withOptions Command.Move.options $ command "copy" paramPaths seek seek :: [CommandSeek] seek = [withField Command.Move.toOption Remote.byName $ \to -> withField Command.Move.fromOption Remote.byName $ \from -> - withNumCopies $ \n -> whenAnnexed $ start to from n] + withFilesInGit $ whenAnnexed $ start to from] -- A copy is just a move that does not delete the source file. -- However, --auto mode avoids unnecessary copies. -start :: Maybe Remote -> Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart -start to from numcopies file (key, backend) = autoCopies key (<) numcopies $ +start :: Maybe Remote -> Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart +start to from file (key, backend) = autoCopies file key (<) $ \_numcopies -> Command.Move.start to from False file (key, backend) diff --git a/Command/Drop.hs b/Command/Drop.hs index b40de00cb2..9eb36a22fe 100644 --- a/Command/Drop.hs +++ b/Command/Drop.hs @@ -26,11 +26,11 @@ fromOption :: Option fromOption = Option.field ['f'] "from" paramRemote "drop content from a remote" seek :: [CommandSeek] -seek = [withField fromOption Remote.byName $ \from -> withNumCopies $ \n -> - whenAnnexed $ start from n] +seek = [withField fromOption Remote.byName $ \from -> + withFilesInGit $ whenAnnexed $ start from] -start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart -start from numcopies file (key, _) = autoCopies key (>) numcopies $ do +start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart +start from file (key, _) = autoCopies file key (>) $ \numcopies -> do case from of Nothing -> startLocal file numcopies key Just remote -> do diff --git a/Command/DropKey.hs b/Command/DropKey.hs index aaaa224661..68fdbfdd96 100644 --- a/Command/DropKey.hs +++ b/Command/DropKey.hs @@ -14,7 +14,7 @@ import Logs.Location import Annex.Content def :: [Command] -def = [command "dropkey" (paramRepeating paramKey) seek +def = [oneShot $ command "dropkey" (paramRepeating paramKey) seek "drops annexed content for specified keys"] seek :: [CommandSeek] diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 59af29edb1..94b3601043 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -36,12 +36,13 @@ options = [fromOption] seek :: [CommandSeek] seek = [ withField fromOption Remote.byName $ \from -> - withNumCopies $ \n -> whenAnnexed $ start from n + withFilesInGit $ whenAnnexed $ start from , withBarePresentKeys startBare ] -start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart -start from numcopies file (key, backend) = do +start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart +start from file (key, backend) = do + numcopies <- numCopies file showStart "fsck" file case from of Nothing -> next $ perform key file backend numcopies @@ -81,7 +82,7 @@ performRemote key file backend numcopies remote = do t <- fromRepo gitAnnexTmpDir let tmp = t "fsck" ++ show pid ++ "." ++ keyFile key liftIO $ createDirectoryIfMissing True t - let cleanup = liftIO $ catch (removeFile tmp) (const $ return ()) + let cleanup = liftIO $ catchIO (removeFile tmp) (const $ return ()) cleanup cleanup `after` a tmp getfile tmp = do diff --git a/Command/Get.hs b/Command/Get.hs index 5d032e13c4..928ab0f1be 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -19,11 +19,11 @@ def = [withOptions [Command.Move.fromOption] $ command "get" paramPaths seek seek :: [CommandSeek] seek = [withField Command.Move.fromOption Remote.byName $ \from -> - withNumCopies $ \n -> whenAnnexed $ start from n] + withFilesInGit $ whenAnnexed $ start from] -start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart -start from numcopies file (key, _) = stopUnless (not <$> inAnnex key) $ - autoCopies key (<) numcopies $ do +start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart +start from file (key, _) = stopUnless (not <$> inAnnex key) $ + autoCopies file key (<) $ \_numcopies -> do case from of Nothing -> go $ perform key Just src -> do diff --git a/Command/InAnnex.hs b/Command/InAnnex.hs index c41f9a92c1..ad0a4d5c7c 100644 --- a/Command/InAnnex.hs +++ b/Command/InAnnex.hs @@ -12,7 +12,7 @@ import Command import Annex.Content def :: [Command] -def = [command "inannex" (paramRepeating paramKey) seek +def = [oneShot $ command "inannex" (paramRepeating paramKey) seek "checks if keys are present in the annex"] seek :: [CommandSeek] diff --git a/Command/Lock.hs b/Command/Lock.hs index 329fd3eff7..b8aedb252b 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -10,7 +10,6 @@ module Command.Lock where import Common.Annex import Command import qualified Annex.Queue -import Backend def :: [Command] def = [command "lock" paramPaths seek "undo unlock command"] @@ -18,9 +17,8 @@ def = [command "lock" paramPaths seek "undo unlock command"] seek :: [CommandSeek] seek = [withFilesUnlocked start, withFilesUnlockedToBeCommitted start] -{- Undo unlock -} -start :: BackendFile -> CommandStart -start (_, file) = do +start :: FilePath -> CommandStart +start file = do showStart "lock" file next $ perform file diff --git a/Command/Migrate.hs b/Command/Migrate.hs index f6467463d0..c6b0f086cf 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -19,12 +19,12 @@ def :: [Command] def = [command "migrate" paramPaths seek "switch data to different backend"] seek :: [CommandSeek] -seek = [withBackendFilesInGit $ \(b, f) -> whenAnnexed (start b) f] +seek = [withFilesInGit $ whenAnnexed start] -start :: Maybe Backend -> FilePath -> (Key, Backend) -> CommandStart -start b file (key, oldbackend) = do +start :: FilePath -> (Key, Backend) -> CommandStart +start file (key, oldbackend) = do exists <- inAnnex key - newbackend <- choosebackend b + newbackend <- choosebackend =<< Backend.chooseBackend file if (newbackend /= oldbackend || upgradableKey key) && exists then do showStart "migrate" file diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 57bc7ac138..b0328ca190 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -10,7 +10,6 @@ module Command.PreCommit where import Command import qualified Command.Add import qualified Command.Fix -import Backend def :: [Command] def = [command "pre-commit" paramPaths seek "run by git pre-commit hook"] @@ -22,12 +21,12 @@ seek = [ withFilesToBeCommitted $ whenAnnexed Command.Fix.start , withFilesUnlockedToBeCommitted start] -start :: BackendFile -> CommandStart -start p = next $ perform p +start :: FilePath -> CommandStart +start file = next $ perform file -perform :: BackendFile -> CommandPerform -perform pair@(_, file) = do - ok <- doCommand $ Command.Add.start pair +perform :: FilePath -> CommandPerform +perform file = do + ok <- doCommand $ Command.Add.start file if ok then next $ return True else error $ "failed to add " ++ file ++ "; canceling commit" diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 5243fa9d4b..9744a56d4a 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -14,7 +14,7 @@ import Annex.Content import Utility.RsyncFile def :: [Command] -def = [command "recvkey" paramKey seek +def = [oneShot $ command "recvkey" paramKey seek "runs rsync in server mode to receive content"] seek :: [CommandSeek] @@ -28,7 +28,7 @@ start key = do if ok then do -- forcibly quit after receiving one key, - -- and shutdown cleanly so queued git commands run - _ <- shutdown + -- and shutdown cleanly + _ <- shutdown True liftIO exitSuccess else liftIO exitFailure diff --git a/Command/SendKey.hs b/Command/SendKey.hs index 7b1cd3ecae..686a31caa7 100644 --- a/Command/SendKey.hs +++ b/Command/SendKey.hs @@ -13,7 +13,7 @@ import Annex.Content import Utility.RsyncFile def :: [Command] -def = [command "sendkey" paramKey seek +def = [oneShot $ command "sendkey" paramKey seek "runs rsync in server mode to send content"] seek :: [CommandSeek] diff --git a/Command/Status.hs b/Command/Status.hs index d2d8d4c077..dfe847bb8e 100644 --- a/Command/Status.hs +++ b/Command/Status.hs @@ -7,7 +7,7 @@ module Command.Status where -import Control.Monad.State +import Control.Monad.State.Strict import qualified Data.Map as M import qualified Data.Set as S import Data.Set (Set) @@ -66,8 +66,8 @@ slow_stats = , bad_data_size , local_annex_keys , local_annex_size - , visible_annex_keys - , visible_annex_size + , known_annex_keys + , known_annex_size , backend_usage ] @@ -113,7 +113,7 @@ supported_remote_types = stat "supported remote types" $ json unwords $ remote_list :: TrustLevel -> String -> Stat remote_list level desc = stat n $ nojson $ lift $ do - us <- M.keys <$> (M.union <$> uuidMap <*> remoteMap) + us <- M.keys <$> (M.union <$> uuidMap <*> remoteMap Remote.name) rs <- fst <$> trustPartition level us s <- prettyPrintUUIDs n rs return $ if null s then "0" else show (length rs) ++ "\n" ++ beginning s @@ -128,12 +128,12 @@ local_annex_keys :: Stat local_annex_keys = stat "local annex keys" $ json show $ S.size <$> cachedKeysPresent -visible_annex_size :: Stat -visible_annex_size = stat "visible annex size" $ json id $ +known_annex_size :: Stat +known_annex_size = stat "known annex size" $ json id $ keySizeSum <$> cachedKeysReferenced -visible_annex_keys :: Stat -visible_annex_keys = stat "visible annex keys" $ json show $ +known_annex_keys :: Stat +known_annex_keys = stat "known annex keys" $ json show $ S.size <$> cachedKeysReferenced tmp_size :: Stat diff --git a/Command/Uninit.hs b/Command/Uninit.hs index cef89a5cf3..ec6d0abf39 100644 --- a/Command/Uninit.hs +++ b/Command/Uninit.hs @@ -57,7 +57,7 @@ cleanup = do mapM_ removeAnnex =<< getKeysPresent liftIO $ removeDirectoryRecursive annexdir -- avoid normal shutdown - saveState + saveState False inRepo $ Git.Command.run "branch" [Param "-D", Param $ show Annex.Branch.name] liftIO exitSuccess diff --git a/Command/Version.hs b/Command/Version.hs index 9fb7fe5bdb..af08d3d709 100644 --- a/Command/Version.hs +++ b/Command/Version.hs @@ -13,7 +13,7 @@ import qualified Build.SysConfig as SysConfig import Annex.Version def :: [Command] -def = [noRepo showPackageVersion $ dontCheck repoExists $ +def = [oneShot $ noRepo showPackageVersion $ dontCheck repoExists $ command "version" paramNothing seek "show version info"] seek :: [CommandSeek] diff --git a/Command/Whereis.hs b/Command/Whereis.hs index 1fbe707992..f62d34642f 100644 --- a/Command/Whereis.hs +++ b/Command/Whereis.hs @@ -7,6 +7,8 @@ module Command.Whereis where +import qualified Data.Map as M + import Common.Annex import Command import Remote @@ -17,24 +19,36 @@ def = [command "whereis" paramPaths seek "lists repositories that have file content"] seek :: [CommandSeek] -seek = [withFilesInGit $ whenAnnexed start] +seek = [withValue (remoteMap id) $ \m -> + withFilesInGit $ whenAnnexed $ start m] -start :: FilePath -> (Key, Backend) -> CommandStart -start file (key, _) = do +start :: (M.Map UUID Remote) -> FilePath -> (Key, Backend) -> CommandStart +start remotemap file (key, _) = do showStart "whereis" file - next $ perform key + next $ perform remotemap key -perform :: Key -> CommandPerform -perform key = do - (untrustedlocations, safelocations) <- trustPartition UnTrusted =<< keyLocations key +perform :: (M.Map UUID Remote) -> Key -> CommandPerform +perform remotemap key = do + locations <- keyLocations key + (untrustedlocations, safelocations) <- trustPartition UnTrusted locations let num = length safelocations showNote $ show num ++ " " ++ copiesplural num pp <- prettyPrintUUIDs "whereis" safelocations unless (null safelocations) $ showLongNote pp pp' <- prettyPrintUUIDs "untrusted" untrustedlocations unless (null untrustedlocations) $ showLongNote $ untrustedheader ++ pp' + forM_ (catMaybes $ map (`M.lookup` remotemap) locations) $ + performRemote key if null safelocations then stop else next $ return True where copiesplural 1 = "copy" copiesplural _ = "copies" untrustedheader = "The following untrusted locations may also have copies:\n" + +performRemote :: Key -> Remote -> Annex () +performRemote key remote = case whereisKey remote of + Nothing -> return () + Just a -> do + ls <- a key + unless (null ls) $ showLongNote $ + unlines $ map (\l -> name remote ++ ": " ++ l) ls diff --git a/Common.hs b/Common.hs index 90895f08e8..cc6cf92527 100644 --- a/Common.hs +++ b/Common.hs @@ -1,8 +1,9 @@ module Common (module X) where import Control.Monad as X hiding (join) +import Control.Monad.IfElse as X import Control.Applicative as X -import Control.Monad.State as X (liftIO) +import Control.Monad.State.Strict as X (liftIO) import Control.Exception.Extensible as X (IOException) import Data.Maybe as X @@ -20,7 +21,7 @@ import System.Posix.Process as X hiding (executeFile) import System.Exit as X import Utility.Misc as X -import Utility.Conditional as X +import Utility.Exception as X import Utility.SafeCommand as X import Utility.Path as X import Utility.Directory as X diff --git a/Config.hs b/Config.hs index 83a84a1fe2..349ddf67f4 100644 --- a/Config.hs +++ b/Config.hs @@ -40,7 +40,7 @@ remoteConfig r key = "remote." ++ fromMaybe "" (Git.remoteName r) ++ ".annex-" + remoteCost :: Git.Repo -> Int -> Annex Int remoteCost r def = do cmd <- getConfig r "cost-command" "" - (fromMaybe def . readMaybe) <$> + (fromMaybe def . readish) <$> if not $ null cmd then liftIO $ snd <$> pipeFrom "sh" ["-c", cmd] else getConfig r "cost" "" @@ -78,7 +78,7 @@ getNumCopies v = perhaps (use v) =<< Annex.getState Annex.forcenumcopies where use (Just n) = return n use Nothing = perhaps (return 1) =<< - readMaybe <$> fromRepo (Git.Config.get config "1") + readish <$> fromRepo (Git.Config.get config "1") perhaps fallback = maybe fallback (return . id) config = "annex.numcopies" diff --git a/Git/CatFile.hs b/Git/CatFile.hs index 16f0b11b95..2a2eb5e6f7 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -13,7 +13,6 @@ module Git.CatFile ( catObject ) where -import Control.Monad.State import System.Cmd.Utils import System.IO import qualified Data.ByteString.Char8 as S diff --git a/Git/CheckAttr.hs b/Git/CheckAttr.hs index d794ea9f71..ea6d4b0689 100644 --- a/Git/CheckAttr.hs +++ b/Git/CheckAttr.hs @@ -1,40 +1,56 @@ {- git check-attr interface - - - Copyright 2010, 2011 Joey Hess + - Copyright 2010-2012 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} module Git.CheckAttr where -import System.Exit - import Common import Git import Git.Command -import qualified Git.Filename import qualified Git.Version -{- Efficiently looks up a gitattributes value for each file in a list. -} -lookup :: String -> [FilePath] -> Repo -> IO [(FilePath, String)] -lookup attr files repo = do - oldgit <- Git.Version.older "1.7.7" - cwd <- getCurrentDirectory - (_, fromh, toh) <- hPipeBoth "git" (toCommand params) - _ <- forkProcess $ do - hClose fromh - hPutStr toh $ join "\0" $ input cwd oldgit - hClose toh - exitSuccess - hClose toh - output cwd oldgit . lines <$> hGetContents fromh - where - params = gitCommandLine - [ Param "check-attr" - , Param attr - , Params "-z --stdin" - ] repo +type CheckAttrHandle = (PipeHandle, Handle, Handle, [Attr], String) +type Attr = String + +{- Starts git check-attr running to look up the specified gitattributes + - values and return a handle. -} +checkAttrStart :: [Attr] -> Repo -> IO CheckAttrHandle +checkAttrStart attrs repo = do + cwd <- getCurrentDirectory + (pid, from, to) <- hPipeBoth "git" $ toCommand $ + gitCommandLine params repo + return (pid, from, to, attrs, cwd) + where + params = + [ Param "check-attr" ] + ++ map Param attrs ++ + [ Params "-z --stdin" ] + +{- Stops git check-attr. -} +checkAttrStop :: CheckAttrHandle -> IO () +checkAttrStop (pid, from, to, _, _) = do + hClose to + hClose from + forceSuccess pid + +{- Gets an attribute of a file. -} +checkAttr :: CheckAttrHandle -> Attr -> FilePath -> IO String +checkAttr (_, from, to, attrs, cwd) want file = do + oldgit <- Git.Version.older "1.7.7" + hPutStr to $ file' oldgit ++ "\0" + hFlush to + pairs <- forM attrs $ \attr -> do + l <- hGetLine from + return (attr, attrvalue attr l) + let vals = map snd $ filter (\(attr, _) -> attr == want) pairs + case vals of + [v] -> return v + _ -> error $ "unable to determine " ++ want ++ " attribute of " ++ file + where {- Before git 1.7.7, git check-attr worked best with - absolute filenames; using them worked around some bugs - with relative filenames. @@ -42,25 +58,10 @@ lookup attr files repo = do - With newer git, git check-attr chokes on some absolute - filenames, and the bugs that necessitated them were fixed, - so use relative filenames. -} - input cwd oldgit - | oldgit = map (absPathFrom cwd) files - | otherwise = map (relPathDirToFile cwd . absPathFrom cwd) files - output cwd oldgit - | oldgit = map (torel cwd . topair) - | otherwise = map topair - - topair l = (Git.Filename.decode file, value) - where - file = join sep $ beginning bits - value = end bits !! 0 + file' oldgit + | oldgit = absPathFrom cwd file + | otherwise = relPathDirToFile cwd $ absPathFrom cwd file + attrvalue attr l = end bits !! 0 + where bits = split sep l sep = ": " ++ attr ++ ": " - - torel cwd (file, value) = (relfile, value) - where - relfile - | startswith cwd' file = drop (length cwd') file - | otherwise = relPathDirToFile top' file - top = workTree repo - cwd' = cwd ++ "/" - top' = top ++ "/" diff --git a/Git/Command.hs b/Git/Command.hs index 2350bb0ca3..ec701c1f09 100644 --- a/Git/Command.hs +++ b/Git/Command.hs @@ -30,8 +30,8 @@ runBool subcommand params repo = assertLocal repo $ {- Runs git in the specified repo, throwing an error if it fails. -} run :: String -> [CommandParam] -> Repo -> IO () run subcommand params repo = assertLocal repo $ - runBool subcommand params repo - >>! error $ "git " ++ show params ++ " failed" + unlessM (runBool subcommand params repo) $ + error $ "git " ++ subcommand ++ " " ++ show params ++ " failed" {- Runs a git subcommand and returns its output, lazily. - diff --git a/Git/Construct.hs b/Git/Construct.hs index bfb16164ff..ef6094a21d 100644 --- a/Git/Construct.hs +++ b/Git/Construct.hs @@ -9,6 +9,7 @@ module Git.Construct ( fromCurrent, fromCwd, fromAbsPath, + fromPath, fromUrl, fromUnknown, localToUrl, diff --git a/Git/HashObject.hs b/Git/HashObject.hs index f5e6d50cdf..35fc26e7a9 100644 --- a/Git/HashObject.hs +++ b/Git/HashObject.hs @@ -1,6 +1,6 @@ {- git hash-object interface - - - Copyright 2011 Joey Hess + - Copyright 2011-2012 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,22 +11,31 @@ import Common import Git import Git.Command -{- Injects a set of files into git, returning the shas of the objects - - and an IO action to call ones the the shas have been used. -} -hashFiles :: [FilePath] -> Repo -> IO ([Sha], IO ()) -hashFiles paths repo = do - (pid, fromh, toh) <- hPipeBoth "git" $ toCommand $ git_hash_object repo - _ <- forkProcess (feeder toh) - hClose toh - shas <- map Ref . lines <$> hGetContentsStrict fromh - return (shas, ender fromh pid) +type HashObjectHandle = (PipeHandle, Handle, Handle) + +{- Starts git hash-object and returns a handle. -} +hashObjectStart :: Repo -> IO HashObjectHandle +hashObjectStart repo = do + r@(_, _, toh) <- hPipeBoth "git" $ + toCommand $ gitCommandLine params repo + return r where - git_hash_object = gitCommandLine - [Param "hash-object", Param "-w", Param "--stdin-paths"] - feeder toh = do - hPutStr toh $ unlines paths - hClose toh - exitSuccess - ender fromh pid = do - hClose fromh - forceSuccess pid + params = + [ Param "hash-object" + , Param "-w" + , Param "--stdin-paths" + ] + +{- Stops git hash-object. -} +hashObjectStop :: HashObjectHandle -> IO () +hashObjectStop (pid, from, to) = do + hClose to + hClose from + forceSuccess pid + +{- Injects a file into git, returning the shas of the objects. -} +hashFile :: HashObjectHandle -> FilePath -> IO Sha +hashFile (_, from, to) file = do + hPutStrLn to file + hFlush to + Ref <$> hGetLine from diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index 0de86383d3..201d76d1d4 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -65,7 +65,13 @@ typeChanged :: [FilePath] -> Repo -> IO [FilePath] typeChanged = typeChanged' [] typeChanged' :: [CommandParam] -> [FilePath] -> Repo -> IO [FilePath] -typeChanged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix +typeChanged' ps l repo = do + fs <- pipeNullSplit (prefix ++ ps ++ suffix) repo + -- git diff returns filenames relative to the top of the git repo; + -- convert to filenames relative to the cwd, like git ls-files. + let top = workTree repo + cwd <- getCurrentDirectory + return $ map (\f -> relPathDirToFile cwd $ top f) fs where prefix = [Params "diff --name-only --diff-filter=T -z"] suffix = Param "--" : map File l diff --git a/Git/Queue.hs b/Git/Queue.hs index 25c5b073c7..58704f4d01 100644 --- a/Git/Queue.hs +++ b/Git/Queue.hs @@ -18,8 +18,8 @@ import qualified Data.Map as M import System.IO import System.Cmd.Utils import Data.String.Utils -import Utility.SafeCommand +import Utility.SafeCommand import Common import Git import Git.Command diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index 4b335e47b1..650e3a5034 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -83,13 +83,14 @@ merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y] {- For merging a single tree into the index. -} merge_tree_index :: Ref -> CatFileHandle -> Repo -> Streamer -merge_tree_index (Ref x) h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x] +merge_tree_index (Ref x) h = calc_merge h $ + "diff-index" : diff_opts ++ ["--cached", x] diff_opts :: [String] diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"] {- Calculates how to perform a merge, using git to get a raw diff, - - and returning a list suitable for update_index. -} + - and generating update-index input. -} calc_merge :: CatFileHandle -> [String] -> Repo -> Streamer calc_merge ch differ repo streamer = gendiff >>= go where @@ -100,7 +101,7 @@ calc_merge ch differ repo streamer = gendiff >>= go go (_:[]) = error "calc_merge parse error" {- Given an info line from a git raw diff, and the filename, generates - - a line suitable for update_index that union merges the two sides of the + - a line suitable for update-index that union merges the two sides of the - diff. -} mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String) mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of diff --git a/Git/Url.hs b/Git/Url.hs index 6a893d92fe..21b69dc7c0 100644 --- a/Git/Url.hs +++ b/Git/Url.hs @@ -45,7 +45,7 @@ port :: Repo -> Maybe Integer port r = case authpart uriPort r of ":" -> Nothing - (':':p) -> readMaybe p + (':':p) -> readish p _ -> Nothing {- Hostname of an URL repo, including any username (ie, "user@host") -} diff --git a/GitAnnex.hs b/GitAnnex.hs index bc3541676b..4af10a9ce4 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -119,7 +119,7 @@ options = Option.common ++ "skip files not using a key-value backend" ] ++ Option.matcher where - setnumcopies v = Annex.changeState $ \s -> s {Annex.forcenumcopies = readMaybe v } + setnumcopies v = Annex.changeState $ \s -> s {Annex.forcenumcopies = readish v } setgitconfig :: String -> Annex () setgitconfig v = do newg <- inRepo $ Git.Config.store v diff --git a/Limit.hs b/Limit.hs index 128ea0a27b..0db418e6c6 100644 --- a/Limit.hs +++ b/Limit.hs @@ -84,7 +84,7 @@ addIn name = addLimit $ check $ if name == "." then inAnnex else inremote - of copies. -} addCopies :: String -> Annex () addCopies num = - case readMaybe num :: Maybe Int of + case readish num :: Maybe Int of Nothing -> error "bad number for --copies" Just n -> addLimit $ check n where diff --git a/Remote.hs b/Remote.hs index ffb53446b4..16e1a6a1fb 100644 --- a/Remote.hs +++ b/Remote.hs @@ -15,6 +15,7 @@ module Remote ( removeKey, hasKey, hasKeyCheap, + whereisKey, remoteTypes, remoteList, @@ -48,16 +49,16 @@ import Logs.Trust import Logs.Location import Remote.List -{- Map of UUIDs of Remotes and their names. -} -remoteMap :: Annex (M.Map UUID String) -remoteMap = M.fromList . map (\r -> (uuid r, name r)) . +{- Map from UUIDs of Remotes to a calculated value. -} +remoteMap :: (Remote -> a) -> Annex (M.Map UUID a) +remoteMap c = M.fromList . map (\r -> (uuid r, c r)) . filter (\r -> uuid r /= NoUUID) <$> remoteList {- Map of UUIDs and their descriptions. - The names of Remotes are added to suppliment any description that has - been set for a repository. -} uuidDescriptions :: Annex (M.Map UUID String) -uuidDescriptions = M.unionWith addName <$> uuidMap <*> remoteMap +uuidDescriptions = M.unionWith addName <$> uuidMap <*> remoteMap name addName :: String -> String -> String addName desc n @@ -66,7 +67,7 @@ addName desc n | otherwise = n ++ " (" ++ desc ++ ")" {- When a name is specified, looks up the remote matching that name. - - (Or it can be a UUID.) Only finds currently configured git remotes. -} + - Only finds currently configured git remotes. -} byName :: Maybe String -> Annex (Maybe Remote) byName Nothing = return Nothing byName (Just n) = do diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 7329167dae..a4f43a3f3e 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -8,7 +8,6 @@ module Remote.Bup (remote) where import qualified Data.ByteString.Lazy.Char8 as L -import System.IO.Error import qualified Data.Map as M import System.Process @@ -54,6 +53,7 @@ gen r u c = do removeKey = remove, hasKey = checkPresent r bupr', hasKeyCheap = bupLocal buprepo, + whereisKey = Nothing, config = c, repo = r, remotetype = remote @@ -69,7 +69,7 @@ bupSetup u c = do -- bup init will create the repository. -- (If the repository already exists, bup init again appears safe.) showAction "bup init" - bup "init" buprepo [] >>! error "bup init failed" + unlessM (bup "init" buprepo []) $ error "bup init failed" storeBupUUID u buprepo @@ -167,9 +167,9 @@ storeBupUUID u buprepo = do if Git.repoIsUrl r then do showAction "storing uuid" - onBupRemote r boolSystem "git" - [Params $ "config annex.uuid " ++ v] - >>! error "ssh failed" + unlessM (onBupRemote r boolSystem "git" + [Params $ "config annex.uuid " ++ v]) $ + error "ssh failed" else liftIO $ do r' <- Git.Config.read r let olduuid = Git.Config.get "annex.uuid" "" r' @@ -200,7 +200,7 @@ getBupUUID :: Git.Repo -> UUID -> Annex (UUID, Git.Repo) getBupUUID r u | Git.repoIsUrl r = return (u, r) | otherwise = liftIO $ do - ret <- try $ Git.Config.read r + ret <- tryIO $ Git.Config.read r case ret of Right r' -> return (toUUID $ Git.Config.get "annex.uuid" "" r', r') Left _ -> return (NoUUID, r) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 52f4263409..ee2a0d75aa 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -45,6 +45,7 @@ gen r u c = do removeKey = remove dir, hasKey = checkPresent dir, hasKeyCheap = True, + whereisKey = Nothing, config = Nothing, repo = r, remotetype = remote @@ -55,8 +56,8 @@ directorySetup u c = do -- verify configuration is sane let dir = fromMaybe (error "Specify directory=") $ M.lookup "directory" c - liftIO $ doesDirectoryExist dir - >>! error $ "Directory does not exist: " ++ dir + liftIO $ unlessM (doesDirectoryExist dir) $ + error $ "Directory does not exist: " ++ dir c' <- encryptionSetup c -- The directory is stored in git config, not in this remote's diff --git a/Remote/Git.hs b/Remote/Git.hs index efe1829610..c07ae3237b 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -20,6 +20,7 @@ import qualified Git.Command import qualified Git.Config import qualified Git.Construct import qualified Annex +import Logs.Presence import Annex.UUID import qualified Annex.Content import qualified Annex.BranchState @@ -27,6 +28,7 @@ import qualified Utility.Url as Url import Utility.TempFile import Config import Init +import Types.Key remote :: RemoteType remote = RemoteType { @@ -79,6 +81,7 @@ gen r u _ = do removeKey = dropKey r', hasKey = inAnnex r', hasKeyCheap = cheap, + whereisKey = Nothing, config = Nothing, repo = r', remotetype = remote @@ -142,7 +145,8 @@ inAnnex r key where go e [] = return $ Left e go _ (u:us) = do - res <- catchMsgIO $ Url.exists u + res <- catchMsgIO $ + Url.check u (keySize key) case res of Left e -> go e us v -> return v @@ -192,6 +196,14 @@ keyUrls r key = map tourl (annexLocations key) dropKey :: Git.Repo -> Key -> Annex Bool dropKey r key + | not $ Git.repoIsUrl r = liftIO $ onLocal r $ do + ensureInitialized + whenM (Annex.Content.inAnnex key) $ do + Annex.Content.lockContent key $ + Annex.Content.removeAnnex key + Annex.Content.logStatus key InfoMissing + Annex.Content.saveState True + return True | Git.repoIsHttp r = error "dropping from http repo not supported" | otherwise = onRemote r (boolSystem, False) "dropkey" [ Params "--quiet --force" @@ -230,7 +242,7 @@ copyToRemote r key -- run copy from perspective of remote liftIO $ onLocal r $ do ensureInitialized - Annex.Content.saveState `after` + Annex.Content.saveState True `after` Annex.Content.getViaTmp key (rsyncOrCopyFile params keysrc) | Git.repoIsSsh r = do diff --git a/Remote/Hook.hs b/Remote/Hook.hs index a08c4011ef..c7d710f196 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -45,6 +45,7 @@ gen r u c = do removeKey = remove hooktype, hasKey = checkPresent r hooktype, hasKeyCheap = False, + whereisKey = Nothing, config = Nothing, repo = r, remotetype = remote diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 8de6ba6a74..54fb890cae 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -52,6 +52,7 @@ gen r u c = do removeKey = remove o, hasKey = checkPresent r o, hasKeyCheap = False, + whereisKey = Nothing, config = Nothing, repo = r, remotetype = remote @@ -181,8 +182,8 @@ withRsyncScratchDir a = do liftIO $ createDirectoryIfMissing True tmp nuke tmp `after` a tmp where - nuke d = liftIO $ - doesDirectoryExist d >>? removeDirectoryRecursive d + nuke d = liftIO $ whenM (doesDirectoryExist d) $ + removeDirectoryRecursive d rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool rsyncRemote o params = do diff --git a/Remote/Web.hs b/Remote/Web.hs index 49c3f43f3a..81e6ca321c 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -15,6 +15,7 @@ import Annex.Content import Config import Logs.Web import qualified Utility.Url as Url +import Types.Key remote :: RemoteType remote = RemoteType { @@ -44,6 +45,7 @@ gen r _ _ = removeKey = dropKey, hasKey = checkKey, hasKeyCheap = False, + whereisKey = Just getUrls, config = Nothing, repo = r, remotetype = remote @@ -77,8 +79,8 @@ checkKey key = do us <- getUrls key if null us then return $ Right False - else return . Right =<< checkKey' us -checkKey' :: [URLString] -> Annex Bool -checkKey' us = untilTrue us $ \u -> do + else return . Right =<< checkKey' key us +checkKey' :: Key -> [URLString] -> Annex Bool +checkKey' key us = untilTrue us $ \u -> do showAction $ "checking " ++ u - liftIO $ Url.exists u + liftIO $ Url.check u (keySize key) diff --git a/Seek.hs b/Seek.hs index 8e935c90cd..6f56f30f4a 100644 --- a/Seek.hs +++ b/Seek.hs @@ -14,11 +14,9 @@ module Seek where import Common.Annex import Types.Command import Types.Key -import Backend import qualified Annex import qualified Git import qualified Git.LsFiles as LsFiles -import qualified Git.CheckAttr import qualified Limit import qualified Option @@ -28,26 +26,12 @@ seekHelper a params = inRepo $ \g -> runPreserveOrder (`a` g) params withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek withFilesInGit a params = prepFiltered a $ seekHelper LsFiles.inRepo params -withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek -withAttrFilesInGit attr a params = do - files <- seekHelper LsFiles.inRepo params - prepFilteredGen a fst $ inRepo $ Git.CheckAttr.lookup attr files - -withNumCopies :: (Maybe Int -> FilePath -> CommandStart) -> CommandSeek -withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params - where - go (file, v) = a (readMaybe v) file - -withBackendFilesInGit :: (BackendFile -> CommandStart) -> CommandSeek -withBackendFilesInGit a params = - prepBackendPairs a =<< seekHelper LsFiles.inRepo params - -withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek +withFilesNotInGit :: (FilePath -> CommandStart) -> CommandSeek withFilesNotInGit a params = do {- dotfiles are not acted on unless explicitly listed -} files <- filter (not . dotfile) <$> seek ps dotfiles <- if null dotps then return [] else seek dotps - prepBackendPairs a $ preserveOrder params (files++dotfiles) + prepFiltered a $ return $ preserveOrder params (files++dotfiles) where (dotps, ps) = partition dotfile params seek l = do @@ -65,20 +49,18 @@ withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek withFilesToBeCommitted a params = prepFiltered a $ seekHelper LsFiles.stagedNotDeleted params -withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek +withFilesUnlocked :: (FilePath -> CommandStart) -> CommandSeek withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged -withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek +withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CommandSeek withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged -withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO [FilePath]) -> (BackendFile -> CommandStart) -> CommandSeek +withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO [FilePath]) -> (FilePath -> CommandStart) -> CommandSeek withFilesUnlocked' typechanged a params = do -- unlocked files have changed type from a symlink to a regular file - top <- fromRepo Git.workTree typechangedfiles <- seekHelper typechanged params - unlockedfiles <- liftIO $ filterM notSymlink $ - map (\f -> top ++ "/" ++ f) typechangedfiles - prepBackendPairs a unlockedfiles + let unlockedfiles = liftIO $ filterM notSymlink typechangedfiles + prepFiltered a unlockedfiles withKeys :: (Key -> CommandStart) -> CommandSeek withKeys a params = return $ map (a . parse) params @@ -107,20 +89,13 @@ withNothing _ _ = error "This command takes no parameters." prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart] -prepFiltered a = prepFilteredGen a id - -prepBackendPairs :: (BackendFile -> CommandStart) -> CommandSeek -prepBackendPairs a fs = prepFilteredGen a snd (chooseBackends fs) - -prepFilteredGen :: (b -> CommandStart) -> (b -> FilePath) -> Annex [b] -> Annex [CommandStart] -prepFilteredGen a d fs = do +prepFiltered a fs = do matcher <- Limit.getMatcher map (proc matcher) <$> fs where - proc matcher v = do - let f = d v + proc matcher f = do ok <- matcher f - if ok then a v else return Nothing + if ok then a f else return Nothing notSymlink :: FilePath -> IO Bool notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f diff --git a/Types/Command.hs b/Types/Command.hs index 1233df2cd9..6dbcf48d16 100644 --- a/Types/Command.hs +++ b/Types/Command.hs @@ -36,6 +36,7 @@ data Command = Command { cmdoptions :: [Option] -- command-specific options , cmdnorepo :: Maybe (IO ()) -- an action to run when not in a repo , cmdcheck :: [CommandCheck] -- check stage + , cmdoneshot :: Bool -- don't save state after running , cmdname :: String , cmdparamdesc :: String -- description of params for usage , cmdseek :: [CommandSeek] -- seek stage diff --git a/Types/Key.hs b/Types/Key.hs index 165f814d4b..f258f5c4ce 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -69,8 +69,8 @@ readKey s = if key == Just stubKey then Nothing else key findfields _ v = v addbackend k v = Just k { keyBackendName = v } - addfield 's' k v = Just k { keySize = readMaybe v } - addfield 'm' k v = Just k { keyMtime = readMaybe v } + addfield 's' k v = Just k { keySize = readish v } + addfield 'm' k v = Just k { keyMtime = readish v } addfield _ _ _ = Nothing prop_idempotent_key_read_show :: Key -> Bool diff --git a/Types/Remote.hs b/Types/Remote.hs index 003dd5342a..9bac2ca0f8 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -55,6 +55,8 @@ data RemoteA a = Remote { -- Some remotes can check hasKey without an expensive network -- operation. hasKeyCheap :: Bool, + -- Some remotes can provide additional details for whereis. + whereisKey :: Maybe (Key -> a [String]), -- a Remote can have a persistent configuration store config :: Maybe RemoteConfig, -- git configuration for the remote diff --git a/Upgrade/V0.hs b/Upgrade/V0.hs index c5310c641a..c439c7caa2 100644 --- a/Upgrade/V0.hs +++ b/Upgrade/V0.hs @@ -7,8 +7,6 @@ module Upgrade.V0 where -import System.IO.Error (try) - import Common.Annex import Annex.Content import qualified Upgrade.V1 @@ -47,7 +45,7 @@ getKeysPresent0 dir = do return $ map fileKey0 files where present d = do - result <- try $ + result <- tryIO $ getFileStatus $ dir ++ "/" ++ takeFileName d case result of Right s -> return $ isRegularFile s diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index add50fcf3a..ca2bff6617 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -7,7 +7,6 @@ module Upgrade.V1 where -import System.IO.Error (try) import System.Posix.Types import Data.Char @@ -183,7 +182,7 @@ readLog1 file = catchDefaultIO (parseLog <$> readFileStrict file) [] lookupFile1 :: FilePath -> Annex (Maybe (Key, Backend)) lookupFile1 file = do - tl <- liftIO $ try getsymlink + tl <- liftIO $ tryIO getsymlink case tl of Left _ -> return Nothing Right l -> makekey l @@ -216,7 +215,7 @@ getKeyFilesPresent1' dir = do liftIO $ filterM present files where present f = do - result <- try $ getFileStatus f + result <- tryIO $ getFileStatus f case result of Right s -> return $ isRegularFile s Left _ -> return False diff --git a/Upgrade/V2.hs b/Upgrade/V2.hs index ffc2f60022..c57b0bf685 100644 --- a/Upgrade/V2.hs +++ b/Upgrade/V2.hs @@ -50,7 +50,7 @@ upgrade = do mapM_ (\(k, f) -> inject f $ logFile k) =<< locationLogs mapM_ (\f -> inject f f) =<< logFiles old - saveState + saveState False showProgress when e $ do diff --git a/Usage.hs b/Usage.hs index 34c344b183..a33f6f311b 100644 --- a/Usage.hs +++ b/Usage.hs @@ -76,6 +76,8 @@ paramDate :: String paramDate = "DATE" paramFormat :: String paramFormat = "FORMAT" +paramFile :: String +paramFile = "FILE" paramKeyValue :: String paramKeyValue = "K=V" paramNothing :: String diff --git a/Utility/Conditional.hs b/Utility/Conditional.hs deleted file mode 100644 index 85e39ec64c..0000000000 --- a/Utility/Conditional.hs +++ /dev/null @@ -1,26 +0,0 @@ -{- monadic conditional operators - - - - Copyright 2011 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -module Utility.Conditional where - -import Control.Monad (when, unless) - -whenM :: Monad m => m Bool -> m () -> m () -whenM c a = c >>= flip when a - -unlessM :: Monad m => m Bool -> m () -> m () -unlessM c a = c >>= flip unless a - -(>>?) :: Monad m => m Bool -> m () -> m () -(>>?) = whenM - -(>>!) :: Monad m => m Bool -> m () -> m () -(>>!) = unlessM - --- low fixity allows eg, foo bar >>! error $ "failed " ++ meep -infixr 0 >>? -infixr 0 >>! diff --git a/Utility/CopyFile.hs b/Utility/CopyFile.hs index 5d6855bf01..c42506485b 100644 --- a/Utility/CopyFile.hs +++ b/Utility/CopyFile.hs @@ -8,8 +8,8 @@ module Utility.CopyFile (copyFileExternal) where import System.Directory (doesFileExist, removeFile) +import Control.Monad.IfElse -import Utility.Conditional import Utility.SafeCommand import qualified Build.SysConfig as SysConfig diff --git a/Utility/Directory.hs b/Utility/Directory.hs index 249ed69356..e7b7c442b2 100644 --- a/Utility/Directory.hs +++ b/Utility/Directory.hs @@ -12,15 +12,16 @@ import System.Posix.Files import System.Directory import Control.Exception (throw) import Control.Monad +import Control.Monad.IfElse import Utility.SafeCommand -import Utility.Conditional import Utility.TempFile +import Utility.Exception {- Moves one filename to another. - First tries a rename, but falls back to moving across devices if needed. -} moveFile :: FilePath -> FilePath -> IO () -moveFile src dest = try (rename src dest) >>= onrename +moveFile src dest = tryIO (rename src dest) >>= onrename where onrename (Right _) = return () onrename (Left e) @@ -40,11 +41,10 @@ moveFile src dest = try (rename src dest) >>= onrename Param src, Param tmp] unless ok $ do -- delete any partial - _ <- try $ - removeFile tmp + _ <- tryIO $ removeFile tmp rethrow isdir f = do - r <- try (getFileStatus f) + r <- tryIO $ getFileStatus f case r of (Left _) -> return False (Right s) -> return $ isDirectory s diff --git a/Utility/Exception.hs b/Utility/Exception.hs new file mode 100644 index 0000000000..7b6c9c999f --- /dev/null +++ b/Utility/Exception.hs @@ -0,0 +1,39 @@ +{- Simple IO exception handling + - + - Copyright 2011-2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Exception where + +import Prelude hiding (catch) +import Control.Exception +import Control.Applicative + +{- Catches IO errors and returns a Bool -} +catchBoolIO :: IO Bool -> IO Bool +catchBoolIO a = catchDefaultIO a False + +{- Catches IO errors and returns a Maybe -} +catchMaybeIO :: IO a -> IO (Maybe a) +catchMaybeIO a = catchDefaultIO (Just <$> a) Nothing + +{- Catches IO errors and returns a default value. -} +catchDefaultIO :: IO a -> a -> IO a +catchDefaultIO a def = catchIO a (const $ return def) + +{- Catches IO errors and returns the error message. -} +catchMsgIO :: IO a -> IO (Either String a) +catchMsgIO a = dispatch <$> tryIO a + where + dispatch (Left e) = Left $ show e + dispatch (Right v) = Right v + +{- catch specialized for IO errors only -} +catchIO :: IO a -> (IOException -> IO a) -> IO a +catchIO = catch + +{- try specialized for IO errors only -} +tryIO :: IO a -> IO (Either IOException a) +tryIO = try diff --git a/Utility/Format.hs b/Utility/Format.hs index d8b7e45493..79e94ae963 100644 --- a/Utility/Format.hs +++ b/Utility/Format.hs @@ -88,7 +88,7 @@ gen = filter (not . empty) . fuse [] . scan [] . decode_c | c == '}' = foundvar f var (readjustify $ reverse p) cs | otherwise = inpad (c:p) f var cs inpad p f var [] = Const (novar $ p++";"++var) : f - readjustify = getjustify . fromMaybe 0 . readMaybe + readjustify = getjustify . fromMaybe 0 . readish getjustify i | i == 0 = UnJustified | i < 0 = LeftJustified (-1 * i) diff --git a/Utility/Misc.hs b/Utility/Misc.hs index c9bfcb953a..3ac5ca5c0b 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -8,9 +8,7 @@ module Utility.Misc where import System.IO -import System.IO.Error (try) import Control.Monad -import Control.Applicative {- A version of hgetContents that is not lazy. Ensures file is - all read before it gets closed. -} @@ -37,22 +35,3 @@ separate c l = unbreak $ break c l {- Breaks out the first line. -} firstLine :: String-> String firstLine = takeWhile (/= '\n') - -{- Catches IO errors and returns a Bool -} -catchBoolIO :: IO Bool -> IO Bool -catchBoolIO a = catchDefaultIO a False - -{- Catches IO errors and returns a Maybe -} -catchMaybeIO :: IO a -> IO (Maybe a) -catchMaybeIO a = catchDefaultIO (Just <$> a) Nothing - -{- Catches IO errors and returns a default value. -} -catchDefaultIO :: IO a -> a -> IO a -catchDefaultIO a def = catch a (const $ return def) - -{- Catches IO errors and returns the error message. -} -catchMsgIO :: IO a -> IO (Either String a) -catchMsgIO a = dispatch <$> try a - where - dispatch (Left e) = Left $ show e - dispatch (Right v) = Right v diff --git a/Utility/PartialPrelude.hs b/Utility/PartialPrelude.hs index 507fc6252b..6efa093fd3 100644 --- a/Utility/PartialPrelude.hs +++ b/Utility/PartialPrelude.hs @@ -7,8 +7,10 @@ module Utility.PartialPrelude where +import qualified Data.Maybe + {- read should be avoided, as it throws an error - - Instead, use: readMaybe -} + - Instead, use: readish -} read :: Read a => String -> a read = Prelude.read @@ -36,16 +38,18 @@ last = Prelude.last - - Ignores leading/trailing whitespace, and throws away any trailing - text after the part that can be read. + - + - readMaybe is available in Text.Read in new versions of GHC, + - but that one requires the entire string to be consumed. -} -readMaybe :: Read a => String -> Maybe a -readMaybe s = case reads s of +readish :: Read a => String -> Maybe a +readish s = case reads s of ((x,_):_) -> Just x _ -> Nothing {- Like head but Nothing on empty list. -} headMaybe :: [a] -> Maybe a -headMaybe [] = Nothing -headMaybe v = Just $ Prelude.head v +headMaybe = Data.Maybe.listToMaybe {- Like last but Nothing on empty list. -} lastMaybe :: [a] -> Maybe a diff --git a/Utility/Path.hs b/Utility/Path.hs index 9f4fe29277..ed5e59cb5f 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -47,7 +47,10 @@ dirContains a b = a == b || a' == b' || (a'++"/") `isPrefixOf` b' a' = norm a b' = norm b -{- Converts a filename into a normalized, absolute path. -} +{- Converts a filename into a normalized, absolute path. + - + - Unlike Directory.canonicalizePath, this does not require the path + - already exists. -} absPath :: FilePath -> IO FilePath absPath file = do cwd <- getCurrentDirectory diff --git a/Utility/State.hs b/Utility/State.hs new file mode 100644 index 0000000000..c27f3c2610 --- /dev/null +++ b/Utility/State.hs @@ -0,0 +1,26 @@ +{- state monad support + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.State where + +import Control.Monad.State.Strict + +{- Modifies Control.Monad.State's state, forcing a strict update. + - This avoids building thunks in the state and leaking. + - Why it's not the default, I don't know. + - + - Example: changeState $ \s -> s { foo = bar } + -} +changeState :: MonadState s m => (s -> s) -> m () +changeState f = do + x <- get + put $! f x + +{- Gets a value from the internal state, selected by the passed value + - constructor. -} +getState :: MonadState s m => (s -> a) -> m a +getState = gets diff --git a/Utility/TempFile.hs b/Utility/TempFile.hs index 469d52e8ce..4dcbf1cca4 100644 --- a/Utility/TempFile.hs +++ b/Utility/TempFile.hs @@ -12,7 +12,7 @@ import System.IO import System.Posix.Process hiding (executeFile) import System.Directory -import Utility.Misc +import Utility.Exception import Utility.Path {- Runs an action like writeFile, writing to a temp file first and diff --git a/Utility/Url.hs b/Utility/Url.hs index e10b8a92a4..dfdebaf06a 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -7,6 +7,7 @@ module Utility.Url ( URLString, + check, exists, canDownload, download, @@ -14,25 +15,39 @@ module Utility.Url ( ) where import Control.Applicative +import Control.Monad import qualified Network.Browser as Browser import Network.HTTP import Network.URI +import Data.Maybe import Utility.SafeCommand import Utility.Path type URLString = String -{- Checks that an url exists and could be successfully downloaded. -} -exists :: URLString -> IO Bool +{- Checks that an url exists and could be successfully downloaded, + - also checking that its size, if available, matches a specified size. -} +check :: URLString -> Maybe Integer -> IO Bool +check url expected_size = handle <$> exists url + where + handle (False, _) = False + handle (True, Nothing) = True + handle (True, s) = expected_size == s + +{- Checks that an url exists and could be successfully downloaded, + - also returning its size if available. -} +exists :: URLString -> IO (Bool, Maybe Integer) exists url = case parseURI url of - Nothing -> return False + Nothing -> return (False, Nothing) Just u -> do r <- request u HEAD case rspCode r of - (2,_,_) -> return True - _ -> return False + (2,_,_) -> return (True, size r) + _ -> return (False, Nothing) + where + size = liftM read . lookupHeader HdrContentLength . rspHeaders canDownload :: IO Bool canDownload = (||) <$> inPath "wget" <*> inPath "curl" @@ -73,12 +88,32 @@ get url = {- Makes a http request of an url. For example, HEAD can be used to - check if the url exists, or GET used to get the url content (best for - - small urls). -} + - small urls). + - + - This does its own redirect following because Browser's is buggy for HEAD + - requests. + -} request :: URI -> RequestMethod -> IO (Response String) -request url requesttype = Browser.browse $ do - Browser.setErrHandler ignore - Browser.setOutHandler ignore - Browser.setAllowRedirects True - snd <$> Browser.request (mkRequest requesttype url :: Request_String) +request url requesttype = go 5 url where + go :: Int -> URI -> IO (Response String) + go 0 _ = error "Too many redirects " + go n u = do + rsp <- Browser.browse $ do + Browser.setErrHandler ignore + Browser.setOutHandler ignore + Browser.setAllowRedirects False + snd <$> Browser.request (mkRequest requesttype u :: Request_String) + case rspCode rsp of + (3,0,x) | x /= 5 -> redir (n - 1) u rsp + _ -> return rsp ignore = const $ return () + redir n u rsp = do + case retrieveHeaders HdrLocation rsp of + [] -> return rsp + (Header _ newu:_) -> + case parseURIReference newu of + Nothing -> return rsp + Just newURI -> go n newURI_abs + where + newURI_abs = fromMaybe newURI (newURI `relativeTo` u) diff --git a/debian/changelog b/debian/changelog index 4c8eb02d27..50d32ecb2c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,32 @@ +git-annex (3.20120124) UNRELEASED; urgency=low + + * Use the haskell IfElse library. + * Avoid repeated location log commits when a remote is receiving files. + Done by adding a oneshot mode, in which location log changes are + written to the journal, but not committed. Taking advantage of + git-annex's existing ability to recover in this situation. This is + used by git-annex-shell and other places where changes are made to + a remote's location log. + * S3: Fix irrefutable pattern failure when accessing encrypted S3 + credentials. + * addurl: Added a --file option, which can be used to specify what + file the url is added to. This can be used to override the default + filename that is used when adding an url, which is based on the url. + Or, when the file already exists, the url is recorded as another + location of the file. + * addurl: Normalize badly encoded urls. + * Fix teardown of stale cached ssh connections. + * When checking that an url has a key, verify that the Content-Length, + if available, matches the size of the key. + * addurl --fast: Verifies that the url can be downloaded (only getting + its head), and records the size in the key. + * Fixed to use the strict state monad, to avoid leaking all kinds of memory + due to lazy state update thunks when adding/fixing many files. + * Fixed some memory leaks that occurred when committing journal files. + * whereis: Prints the urls of files that the web special remote knows about. + + -- Joey Hess Tue, 24 Jan 2012 16:21:55 -0400 + git-annex (3.20120123~bpo60+1) squeeze-backports; urgency=low * Fixed build dependency that made the previous backport FTBFS on several diff --git a/debian/control b/debian/control index de5fb2b4a9..358bed5270 100644 --- a/debian/control +++ b/debian/control @@ -14,6 +14,7 @@ Build-Depends: libghc6-testpack-dev, libghc6-json-dev, libghc6-quickcheck2-dev, + libghc6-ifelse-dev, ikiwiki, perlmagick, git, diff --git a/debian/manpages b/debian/manpages deleted file mode 100644 index ca34203aa0..0000000000 --- a/debian/manpages +++ /dev/null @@ -1 +0,0 @@ -git-annex.1 diff --git a/doc/bugs.mdwn b/doc/bugs.mdwn index 2786e5bf74..b0837eb10b 100644 --- a/doc/bugs.mdwn +++ b/doc/bugs.mdwn @@ -2,3 +2,5 @@ This is git-annex's bug list. Link bugs to [[bugs/done]] when done. [[!inline pages="./bugs/* and !./bugs/done and !link(done) and !*/Discussion" actions=yes postform=yes show=0 archive=yes]] + +[[!edittemplate template=templates/bugtemplate match="bugs/*" silent=yes]] diff --git a/doc/bugs/copy_doesn__39__t_scale.mdwn b/doc/bugs/copy_doesn__39__t_scale.mdwn new file mode 100644 index 0000000000..adbd0660af --- /dev/null +++ b/doc/bugs/copy_doesn__39__t_scale.mdwn @@ -0,0 +1,35 @@ +It seems that git-annex copies every individual file in a separate +transaction. This is quite costly for mass transfers: each file involves a +separate rsync invocation and the creation of a new commit. Even with a +meager thousand files or so in the annex, I have to wait for fifteen +minutes to copy the contents to another disk, simply because every +individual file involves some disk thrashing. Also, it seems suspicious +that the git-annex branch would get a thousands commits of history from the +simple procedure of copying everything to a new repository. Surely it would +be better to first copy everything and then create only a single commit +that registers the changes to the files' availability? + +> git-annex is very careful to commit as infrequently as possible, +> and the current version makes *1* commit after all the copies are +> complete, even if it transferred a billion files. The only overhead +> incurred for each file is writing a journal file. +> You must have an old version. +> --[[Joey]] + +(I'm also not quite clear on why rsync is being used when both repositories +are local. It seems to be just overhead.) + +> Even when copying to another disk it's often on +> some slow bus, and the file is by definition large. So it's +> nice to support resumes of interrupted transfers of files. +> Also because rsync has a handy progress display that is hard to get with cp. +> +> (However, if the copy is to another directory in the same disk, it does +> use cp, and even supports really fast copies on COW filesystems.) +> --[[Joey]] + +--- + +Oneshot mode is now implemented, making git-annex-shell and other +short lifetime processes not bother with committing changes. +[[done]] --[[Joey]] diff --git a/doc/bugs/copy_doesn__39__t_scale/comment_1_7c12499c9ac28a9883c029f8c659eb57._comment b/doc/bugs/copy_doesn__39__t_scale/comment_1_7c12499c9ac28a9883c029f8c659eb57._comment new file mode 100644 index 0000000000..749b3ba108 --- /dev/null +++ b/doc/bugs/copy_doesn__39__t_scale/comment_1_7c12499c9ac28a9883c029f8c659eb57._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawk6QAwUsFHpr3Km1yQbg8hf3S7RDYf7hX4" + nickname="Lauri" + subject="comment 1" + date="2012-01-28T00:17:37Z" + content=""" +To me it very much seems that a commit per file is indeed created at the remote end, although not at the local end. See the following transcript: . + + +"""]] diff --git a/doc/bugs/copy_doesn__39__t_scale/comment_2_f85d8023cdbc203bb439644cf7245d4e._comment b/doc/bugs/copy_doesn__39__t_scale/comment_2_f85d8023cdbc203bb439644cf7245d4e._comment new file mode 100644 index 0000000000..9a2bd92fa3 --- /dev/null +++ b/doc/bugs/copy_doesn__39__t_scale/comment_2_f85d8023cdbc203bb439644cf7245d4e._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2012-01-28T19:32:36Z" + content=""" +Ah, I see, I was not thinking about the location log update that's done on the remote side. + +For transfers over ssh, that's a separate git-annex-shell invoked per change. For local-local transfers, it's all done in a single process but it spins up a state to handle the remote and then immediately shuts it down, also generating a commit. + +In either case, I think there is a nice fix. Since git-annex *does* have a journal nowadays, and goes to all the bother to +support recovery if a process was interrupted and journalled changes that did not get committed, there's really no reason in either of these cases for the remote end to do anything more than journal the change. The next time git-annex is actually run on the remote, and needs to look up location information, it will merge the journalled changes into the branch, in a single commit. + +My only real concern is that some remotes might *never* have git-annex run in them directly, and would just continue to accumulate journal files forever. Although due to the way the journal is structured, it can have, at a maximum, the number of files in the git-annex branch. However, the number of files in it is expected to be relatively smal and it might get a trifle innefficient, as it lacks directory hashing. These performance problems could certainly be dealt with if they do turn out to be a problem. +"""]] diff --git a/doc/bugs/copy_doesn__39__t_scale/comment_3_4592765c3d77bb5664b8d16867e9d79c._comment b/doc/bugs/copy_doesn__39__t_scale/comment_3_4592765c3d77bb5664b8d16867e9d79c._comment new file mode 100644 index 0000000000..aa9bf1e45a --- /dev/null +++ b/doc/bugs/copy_doesn__39__t_scale/comment_3_4592765c3d77bb5664b8d16867e9d79c._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawk6QAwUsFHpr3Km1yQbg8hf3S7RDYf7hX4" + nickname="Lauri" + subject="comment 3" + date="2012-01-29T01:51:35Z" + content=""" +That sounds just fine, but indeed my use case was a bare backup/transfer repository that is meant to always be only at the remote end of git-annex operations. So why not as well do a single commit after everything has been copied and journaled? That's what's done at the other end too, after all. Or, if commits are to be minimized, just stage the journal into the index before finishing, but don't commit it yet? + +(I would actually prefer this mode of usage for other git-annex operations, too. In git you can add stuff little by little and commit them all in one go. In git-annex the add immediately creates a commit, which is unexpected and a bit annoying.) + +"""]] diff --git a/doc/bugs/git_annex_add_memory_leak.mdwn b/doc/bugs/git_annex_add_memory_leak.mdwn new file mode 100644 index 0000000000..891ba318f6 --- /dev/null +++ b/doc/bugs/git_annex_add_memory_leak.mdwn @@ -0,0 +1,42 @@ +For the record, `git annex add` has had a series of memory leaks. +Mostly these are minor -- until you need to check in a few +million files in a single operation. + +If this happens to you, git-annex will run out of memory and stop. +(Generally well before your system runs out of memory, since it has some +built-in ulimits.) You can recover by just re-running the `git annex add` +-- it will automatically pick up where it left off. + +A history of the leaks: + +* Originally, `git annex add` remembered all the files + it had added, and fed them to git at the end. Of course + that made its memory use grow, so it was fixed to periodically + flush its buffer. Fixed in version 0.20110417. + +* Something called a "lazy state monad" caused "thunks" to build + up and memory to leak. Also affected other git annex commands + than `add`. Adding files using a SHA* backend hit the worst. + Fixed in versions afer 3.20120123. + +* Committing journal files turned out to have another memory leak. + After adding a lot of files ran out of memory, this left the journal + behind and could affect other git-annex commands. Fixed in versions afer + 3.20120123. + +* Something is still causing a slow leak when adding files. + I tested by adding many copies of the whole linux kernel + tree into the annex using the WORM backend, and once + it had added 1 million files, git-annex used ~100 mb of ram. + That's 100 bytes leaked per file on average .. roughly the + size of a filename? It's worth noting that `git add` uses more memory + than that in such a large tree. + **not fixed yet** + +* (Note that `git ls-files --others`, which is used to find files to add, + also uses surpsisingly large amounts + of memory when you have a lot of files. It buffers + the entire list, so it can compare it with the files in the index, + before outputting anything. + This is Not Our Problem, but I'm sure the git developers + would appreciate a patch that fixes it.) diff --git a/doc/bugs/problems_with_utf8_names.mdwn b/doc/bugs/problems_with_utf8_names.mdwn index d6dc6ca3c3..aeeb16be65 100644 --- a/doc/bugs/problems_with_utf8_names.mdwn +++ b/doc/bugs/problems_with_utf8_names.mdwn @@ -1,3 +1,16 @@ +This bug is reopened to track some new UTF-8 filename issues caused by GHC +7.4. In this version of GHC, git-annex's hack to support filenames in any +encoding no longer works. Even unicode filenames fail to work when +git-annex is built with 7.4. --[[Joey]] + +This bug is now fixed in current master. Once again, git-annex will work +for all filename encodings, and all system encodings. It will +only build with the new GHC. [[done]] --[[Joey]] + +---- + +Old, now fixed bug report follows: + There are problems with displaying filenames in UTF8 encoding, as shown here: $ echo $LANG @@ -45,7 +58,7 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu > outputting a filename (assuming the filename is encoded using the > user's configured encoding), and allow haskell's output encoding to then > encode it according to the user's locale configuration. -> > This is now [[implemented|done]]. I'm not very happy that I have to watch +> > This is now implemented. I'm not very happy that I have to watch > > out for any place that a filename is output and call `filePathToString` > > on it, but there are really not too many such places in git-annex. > > @@ -66,39 +79,3 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu > > On second thought, I switched to this. Any decoding of a filename > > is going to make someone unhappy; the previous approach broke > > non-utf8 filenames. - ----- - -Simpler test case: - -
-import Codec.Binary.UTF8.String
-import System.Environment
-
-main = do
-        args <- getArgs
-        let file = decodeString $ head args
-        putStrLn $ "file is: " ++ file
-        putStr =<< readFile file
-
- -If I pass this a filename like 'ü', it will fail, and notice -the bad encoding of the filename in the error message: - -
-$ echo hi > ü; runghc foo.hs ü
-file is: ü
-foo.hs: �: openFile: does not exist (No such file or directory)
-
- -On the other hand, if I remove the decodeString, it prints the filename -wrong, while accessing it right: - -
-$ runghc foo.hs ü
-file is: üa
-hi
-
- -The only way that seems to consistently work is to delay decoding the -filename to places where it's output. But then it's easy to miss some. diff --git a/doc/bugs/problems_with_utf8_names/comment_5_519cda534c7aea7f5ad5acd3f76e21fa._comment b/doc/bugs/problems_with_utf8_names/comment_5_519cda534c7aea7f5ad5acd3f76e21fa._comment new file mode 100644 index 0000000000..96b0ffed04 --- /dev/null +++ b/doc/bugs/problems_with_utf8_names/comment_5_519cda534c7aea7f5ad5acd3f76e21fa._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawk6QAwUsFHpr3Km1yQbg8hf3S7RDYf7hX4" + nickname="Lauri" + subject="comment 5" + date="2012-01-26T22:13:18Z" + content=""" +I also encountered Adam's bug. The problem seems to be that communication with the git process is done with `Char8`-bytestrings. So, when `L.unpack` is called, all filenames that git outputs (with `ls-files` or `ls-tree`) are interpreted to be in latin-1, which wreaks havoc if they are really in UTF-8. + +I suspect that it would be enough to just switch to standard `String`s (or `Data.Text.Text`) instead of bytestrings for textual data, and to `Word8`-bytestrings for pure binary data. GHC should nowadays handle locale-dependent encoding of `String`s transparently. + +"""]] diff --git a/doc/bugs/problems_with_utf8_names/comment_6_52e0bfff2b177b6f92e226b25d2f3ff1._comment b/doc/bugs/problems_with_utf8_names/comment_6_52e0bfff2b177b6f92e226b25d2f3ff1._comment new file mode 100644 index 0000000000..093616d476 --- /dev/null +++ b/doc/bugs/problems_with_utf8_names/comment_6_52e0bfff2b177b6f92e226b25d2f3ff1._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 6" + date="2012-01-27T21:00:06Z" + content=""" +Lauri, what version of GHC do you have that behaves this way? 7.0.4 does not. +"""]] diff --git a/doc/bugs/problems_with_utf8_names/comment_7_0cc588f787d6eecfa19a8f6cee4b07b5._comment b/doc/bugs/problems_with_utf8_names/comment_7_0cc588f787d6eecfa19a8f6cee4b07b5._comment new file mode 100644 index 0000000000..5a929940d4 --- /dev/null +++ b/doc/bugs/problems_with_utf8_names/comment_7_0cc588f787d6eecfa19a8f6cee4b07b5._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawk6QAwUsFHpr3Km1yQbg8hf3S7RDYf7hX4" + nickname="Lauri" + subject="comment 7" + date="2012-01-28T00:21:40Z" + content=""" +7.2. nomeata already explained the issue. I got utf-8 filenames to work on a utf-8 locale by switching from Char8-bytestrings to UTF8-bytestrings, and adding `hSetEncoding h localeEncoding` to suitable places. Making things work properly with an arbitrary locale encoding would be more complicated. +"""]] diff --git a/doc/bugs/problems_with_utf8_names/comment_8_ff5c6da9eadfee20c18c86b648a62c47._comment b/doc/bugs/problems_with_utf8_names/comment_8_ff5c6da9eadfee20c18c86b648a62c47._comment new file mode 100644 index 0000000000..dcfd59bce9 --- /dev/null +++ b/doc/bugs/problems_with_utf8_names/comment_8_ff5c6da9eadfee20c18c86b648a62c47._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 8" + date="2012-01-28T19:40:34Z" + content=""" +Lauri a scratch patch would be very helpful. Encoding stuff makes my head explode. + +However, I am very worried by haskell's changes WRT unicode and filenames. Based on user input, git-annex users like to use it on diverse sets of files, with diverse and ill-defined encodings. Faffing about with converting between encodings seems likely to speactacularly fail. +"""]] diff --git a/doc/bugs/show_version_without_having_to_be_in_a_git_repo.mdwn b/doc/bugs/show_version_without_having_to_be_in_a_git_repo.mdwn new file mode 100644 index 0000000000..98b9ced228 --- /dev/null +++ b/doc/bugs/show_version_without_having_to_be_in_a_git_repo.mdwn @@ -0,0 +1,11 @@ +It'd be nice to be able to run "git annex version" -- and maybe some other +commands, like "git annex" itself for the help text, without having to be +inside a git repo. Right now it requires you to be in a git repo even if +it's not a git-annex repo. + +> You need a newer verison of git-annex. --[[Joey]] + + joey@gnu:/>git annex version + git-annex version: 3.20120124 + +[[done]] diff --git a/doc/download.mdwn b/doc/download.mdwn index e1257d2618..120e0a517d 100644 --- a/doc/download.mdwn +++ b/doc/download.mdwn @@ -15,3 +15,21 @@ From time to time, releases of git-annex are uploaded Some operating systems include git-annex in easily prepackaged form and others need some manual work. See [[install]] for details. + +## git branches + +The git repository has some branches: + +* `debian-stable` contains the latest backport of git-annex to Debian + stable. +* `no-s3` disables the S3 special remote, for systems that lack the + necessary haskell library. +* `old-monad-control` is for systems that don't have a newer monad-control + library. +* `tweak-fetch` adds support for the git tweak-fetch hook, which has + been proposed and implemented but not yet accepted into git. +* `ghc7.0` supports versions of ghc older than 7.4, which + had a major change to filename encoding. +* `setup` contains configuration for this website +* `pristine-tar` contains [pristine-tar](http://kitenet.net/~joey/code/pristine-tar) + data to create tarballs of any past git-annex release. diff --git a/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__.mdwn b/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__.mdwn new file mode 100644 index 0000000000..f06135c24e --- /dev/null +++ b/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__.mdwn @@ -0,0 +1,7 @@ +My annex contains several large files that I have unlocked, edited, and committed again, i.e. the annex contains the version history of those files. However, I don't want the history -- keeping the latest version is good enough for me. Running `git annex unused` won't detect those old versions, though, because they aren't unused as old Git revisions still refer to them. So I wonder: + +1. What is the best way to get rid of the old versions of files in the annex? + +2. What is the best way to detect old versions of files in the annex? + +I guess, I could run `git rebase -i` to squash commits to those files into one commit, thereby getting rid of the references to the old copies, but that approach feels awkward and error prone. Is anyone aware of a better way? diff --git a/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__/comment_1_dccf4dc4483d08e5e2936b2cadeafeaf._comment b/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__/comment_1_dccf4dc4483d08e5e2936b2cadeafeaf._comment new file mode 100644 index 0000000000..ee4fe2e6ce --- /dev/null +++ b/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__/comment_1_dccf4dc4483d08e5e2936b2cadeafeaf._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://peter-simons.myopenid.com/" + ip="77.186.179.173" + subject="comment 1" + date="2012-02-09T18:53:00Z" + content=""" +Sorry for commmenting on my own question ... I think I just figured out that `git annex unused` *does* in fact do what I want. When I tried it, it just didn't show the obsolete versions of the files I edited because I hadn't yet synchronized all repositories, so that was why the obsolete versions were still considered used. +"""]] diff --git a/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__/comment_2_5710294c1c8652c12b6df2233255a45e._comment b/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__/comment_2_5710294c1c8652c12b6df2233255a45e._comment new file mode 100644 index 0000000000..576093a87f --- /dev/null +++ b/doc/forum/How_to_expire_old_versions_of_files_that_have_been_edited__63__/comment_2_5710294c1c8652c12b6df2233255a45e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 2" + date="2012-02-09T19:42:28Z" + content=""" +Yes, contents are still considered used while tags or refs refer to them. Including remote tracking branches like `remotes/origin/master` +"""]] diff --git a/doc/forum/cloud_services_to_support.mdwn b/doc/forum/cloud_services_to_support.mdwn new file mode 100644 index 0000000000..e268bc1d86 --- /dev/null +++ b/doc/forum/cloud_services_to_support.mdwn @@ -0,0 +1,16 @@ +git-annex can already be used to store data in several cloud services: +Amazon S3, rsync.net, Tahoe-LAFFS, The Internet Archive. + +I would like to support as many other cloud services as possible/reasonable. + +* [[swift|todo/wishlist:_swift_backend]] +* Dropbox (I had been reluctant to go there due to it using a non-free client, + which I have no interest in installing, but there is actually an API, + and already a + [haskell module to use it](http://hackage.haskell.org/package/dropbox-sdk). + Would need to register for an API key. + . + Annoyingly, Dropbox reviews each app before granting it production status. + Whoops my interest level dropped by 99%.) + +Post others in the comments. --[[Joey]] diff --git a/doc/forum/fsck_gives_false_positives.mdwn b/doc/forum/fsck_gives_false_positives.mdwn new file mode 100644 index 0000000000..2fae57c4ed --- /dev/null +++ b/doc/forum/fsck_gives_false_positives.mdwn @@ -0,0 +1,6 @@ +Hi, + +I use git-annex 3.20120123 on a debian-testing amd-64 machine with software RAID6 and LVM2 on it. I needed to move the whole `/home` directory to another LV (the new LV is on encrypted PV, the old LV is encrypted and not properly aligned; I'm changing from encrypted `/home` only to encrypted everything except `/boot`), so I have used the `rsync -aAXH` from a `ro` mounted `/home` to a new LV mounted on `/mnt/home_2`. After the move was complete I run the `git annex fsck` on my (4TB of) data. The fsck finds some files bad, and moves them to the `..../bad` directory. So far so good, this is how it should be, right? But then- I have a file with sha1sum of all my files. So - I checked the 'bad' file against that. It was OK. Then I computed the SHA256 of the file - this is used by `git annex fsck`. It was OK, too. So how did it happen, that the file was marked as bad? Do I miss something here? Could it be related to the hardware (HDDs) and silent data corruption? Or is it the undesirable effect of rsync? Or maybe the fsck is at fault here? + +Any ideas? + diff --git a/doc/forum/fsck_gives_false_positives/comment_1_b91070218b9d5fb687eeee1f244237ad._comment b/doc/forum/fsck_gives_false_positives/comment_1_b91070218b9d5fb687eeee1f244237ad._comment new file mode 100644 index 0000000000..c65eaf51d9 --- /dev/null +++ b/doc/forum/fsck_gives_false_positives/comment_1_b91070218b9d5fb687eeee1f244237ad._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2012-02-14T16:58:33Z" + content=""" +Well, it should only move files to `.git/annex/bad/` if their filesize is wrong, or their checksum is wrong. + +You can try moving a file out of `.git/annex/bad/` and re-run fsck and see if it fails it again. (And if it does, paste in a log!) + +To do that -- +Suppose you have a file `.git/annex/bad/SHA256-s33--5dc45521382f1c7974d9dbfcff1246370404b952` and you know that file `foobar` was supposed to have that content (you can check that `foobar` is a symlink to that SHA value). Then reinject it: + +`git annex reinject .git/annex/bad/SHA256-s33--5dc45521382f1c7974d9dbfcff1246370404b952 foobar` +"""]] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 148b6336de..9232bf0208 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -145,9 +145,14 @@ subdirectories). * addurl [url ...] - Downloads each url to a file, which is added to the annex. + Downloads each url to its own file, which is added to the annex. - To avoid immediately downloading the url, specify --fast + To avoid immediately downloading the url, specify --fast. + + To specify what file the url is added to, specify --file. This changes + the behavior; now all the specified urls are recorded as alternate + locations from which the file can be downloaded. In this mode, addurl + can be used both to add new files, or to add urls to existing files. # REPOSITORY SETUP COMMANDS diff --git a/doc/install.mdwn b/doc/install.mdwn index ab029c8d34..ef24569e29 100644 --- a/doc/install.mdwn +++ b/doc/install.mdwn @@ -32,6 +32,7 @@ To build and use git-annex, you will need: * [HTTP](http://hackage.haskell.org/package/HTTP) * [hS3](http://hackage.haskell.org/package/hS3) * [json](http://hackage.haskell.org/package/json) + * [IfElse](http://hackage.haskell.org/package/IfElse) * Shell commands * [git](http://git-scm.com/) * [uuid](http://www.ossp.org/pkg/lib/uuid/) diff --git a/doc/news/Presentation_at_FOSDEM.mdwn b/doc/news/Presentation_at_FOSDEM.mdwn new file mode 100644 index 0000000000..48daf2678d --- /dev/null +++ b/doc/news/Presentation_at_FOSDEM.mdwn @@ -0,0 +1,4 @@ +git-annex will be briefly presented at FOSDEM, on Sunday February 4th at 15:40. +[Details](http://fosdem.org/2012/schedule/event/gitannex). + +Thanks to Richard Hartmann for making this presentation. diff --git a/doc/news/version_3.20120123.mdwn b/doc/news/version_3.20120123.mdwn new file mode 100644 index 0000000000..4eb37ef2da --- /dev/null +++ b/doc/news/version_3.20120123.mdwn @@ -0,0 +1,27 @@ +News for git-annex 3.20120123: + + There was a bug in the handling of directory special remotes that + could cause partial file contents to be stored in them. If you use + a directory special remote, you should fsck it, to avoid potential + data loss. + Example: git annex fsck --from mydirectory + +git-annex 3.20120123 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * fsck --from: Fscking a remote is now supported. It's done by retrieving + the contents of the specified files from the remote, and checking them, + so can be an expensive operation. Still, if the remote is a special + remote, or a git repository that you cannot run fsck in locally, it's + nice to have the ability to fsck it. + * If you have any directory special remotes, now would be a good time to + fsck them, in case you were hit by the data loss bug fixed in the + previous release! + * fsck --from remote --fast: Avoids expensive file transfers, at the + expense of not checking file size and/or contents. + * Ssh connection caching is now enabled automatically by git-annex. + Only one ssh connection is made to each host per git-annex run, which + can speed some things up a lot, as well as avoiding repeated password + prompts. Concurrent git-annex processes also share ssh connections. + Cached ssh connections are shut down when git-annex exits. + * To disable the ssh caching (if for example you have your own broader + ssh caching configuration), set annex.sshcaching=false."""]] \ No newline at end of file diff --git a/doc/templates/bugtemplate.mdwn b/doc/templates/bugtemplate.mdwn new file mode 100644 index 0000000000..2d35c8f6fb --- /dev/null +++ b/doc/templates/bugtemplate.mdwn @@ -0,0 +1,12 @@ +What steps will reproduce the problem? + + +What is the expected output? What do you see instead? + + +What version of git-annex are you using? On what operating system? + + +Please provide any additional information below. + + diff --git a/doc/tips/assume-unstaged.mdwn b/doc/tips/assume-unstaged.mdwn new file mode 100644 index 0000000000..ef74d9bd40 --- /dev/null +++ b/doc/tips/assume-unstaged.mdwn @@ -0,0 +1,31 @@ +[[!meta title="using assume-unstages to speed up git with large trees of annexed files"]] + +Git update-index's assume-unstaged feature can be used to speed +up `git status` and stuff by not statting the whole tree looking for changed +files. + +This feature works quite well with git-annex. Especially because git +annex's files are immutable, so arn't going to change out from under it, +this is a nice fit. If you have a very large tree and `git status` is +annoyingly slow, you can turn it on: + + git config core.ignoreStat true + +When git mv and git rm are used, those changes *do* get noticed, even +on assume-unchanged files. When new files are added, eg by `git annex add`, +they are also noticed. + +There are two gotchas. Both occur because `git add` does not stage +assume-unchanged files. + +1. When an annexed file is moved to a different directory, it updates + the symlink, and runs `git add` on it. So the file will move, + but the changed symlink will not be noticed by git and it will commit a + dangling symlink. +2. When using `git annex migrate`, it changes the symlink and `git adds` + it. Again this won't be committed. + +These can be worked around by running `git update-index --really-refresh` +after performing such operations. I hope that `git add` will be changed +to stage changes to assume-unchanged files, which would remove this +only complication. --[[Joey]] diff --git a/doc/tips/using_the_web_as_a_special_remote.mdwn b/doc/tips/using_the_web_as_a_special_remote.mdwn index 8009927a49..a151f99332 100644 --- a/doc/tips/using_the_web_as_a_special_remote.mdwn +++ b/doc/tips/using_the_web_as_a_special_remote.mdwn @@ -8,10 +8,10 @@ The web can be used as a [[special_remote|special_remotes]] too. Now the file is downloaded, and has been added to the annex like any other file. So it can be renamed, copied to other repositories, and so on. -Note that git-annex assumes that, if the web site does not 404, the file is -still present on the web, and this counts as one [[copy|copies]] of the -file. So it will let you remove your last copy, trusting it can be -downloaded again: +Note that git-annex assumes that, if the web site does not 404, and has the +right file size, the file is still present on the web, and this counts as +one [[copy|copies]] of the file. So it will let you remove your last copy, +trusting it can be downloaded again: # git annex drop example.com_video.mpeg drop example.com_video.mpeg (checking http://example.com/video.mpeg) ok diff --git a/doc/todo/fsck_special_remotes.mdwn b/doc/todo/fsck_special_remotes.mdwn index c81c56c856..7196baafe6 100644 --- a/doc/todo/fsck_special_remotes.mdwn +++ b/doc/todo/fsck_special_remotes.mdwn @@ -9,3 +9,5 @@ copying the file, just dropping a symlink, etc. The WORM backend doesn't care about file content, so it would be nice to avoid transferring the content at all, and only send the size. + +> [[done]] --[[Joey]] diff --git a/doc/todo/windows_support.mdwn b/doc/todo/windows_support.mdwn index 8df792fd6e..c64e6fce5c 100644 --- a/doc/todo/windows_support.mdwn +++ b/doc/todo/windows_support.mdwn @@ -1,25 +1,16 @@ -short answer: no +Can it be built on Windows? -Long answer, quoting from a mail to someone else: +short answer: not yet -Well, I can tell you that it assumes a POSIX system, both in available -utilities and system calls, So you'd need to use cygwin or something -like that. (Perhaps you already are for git, I think git also assumes a -POSIX system.) So you need a Haskell that can target that. What this -page refers to as "GHC-Cygwin": - -I don't know where to get one. Did find this: - +First, you need to get some unix utilities for windows. Git of course. +Also rsync, and a `cp` command that understands at least `cp -p`, and +`uuid`, and `xargs` and `sha1sum`. Note that some of these could be +replaced with haskell libraries to some degree. -(There are probably also still some places where it assumes / as a path -separator, although I fixed some. Probably almost all are fixed now.) +There are probably still some places where it assumes / as a path +separator, although I fixed probably almost all by now. -FWIW, git-annex works fine on OS X and other fine proprietary unixen. ;P ---[[Joey]] - ----- - -Alternatively, windows versions of these functions could be found, +Then windows versions of these functions could be found, which are all the ones that need POSIX, I think. A fair amount of this, the stuff to do with signals and users, could be empty stubs in windows. The file manipulation, particularly symlinks, would probably be the main @@ -63,3 +54,8 @@ sigCHLD sigINT unionFileModes + +A good starting point is +. However, note +that its implementations of stuff like `createSymbolicLink` are stubs. +--[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index cabc5b1c24..d91818dab1 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -31,7 +31,7 @@ Executable git-annex Build-Depends: MissingH, hslogger, directory, filepath, unix, containers, utf8-string, network, mtl, bytestring, old-locale, time, pcre-light, extensible-exceptions, dataenc, SHA, process, json, HTTP, - base < 5, QuickCheck >= 2.1 + base < 5, IfElse, QuickCheck >= 2.1 Executable git-annex-shell Main-Is: git-annex-shell.hs diff --git a/test.hs b/test.hs index 3aef83e879..245dd6706a 100644 --- a/test.hs +++ b/test.hs @@ -11,10 +11,8 @@ import Test.QuickCheck import System.Posix.Directory (changeWorkingDirectory) import System.Posix.Files -import Control.Exception (bracket_, bracket, throw) -import System.IO.Error import System.Posix.Env -import qualified Control.Exception.Extensible as E +import Control.Exception.Extensible import qualified Data.Map as M import System.IO.HVFS (SystemFS(..)) import Text.JSON @@ -131,7 +129,7 @@ test_init = "git-annex init" ~: TestCase $ innewrepo $ do reponame = "test repo" test_add :: Test -test_add = "git-annex add" ~: TestList [basic, sha1dup, subdirs] +test_add = "git-annex add" ~: TestList [basic, sha1dup, sha1unicode, subdirs] where -- this test case runs in the main repo, to set up a basic -- annexed file that later tests will use @@ -158,6 +156,10 @@ test_add = "git-annex add" ~: TestList [basic, sha1dup, subdirs] git_annex "add" [sha1annexedfiledup, "--backend=SHA1"] @? "add of second file with same SHA1 failed" annexed_present sha1annexedfiledup annexed_present sha1annexedfile + sha1unicode = TestCase $ intmpclonerepo $ do + writeFile sha1annexedfileunicode $ content sha1annexedfileunicode + git_annex "add" [sha1annexedfileunicode, "--backend=SHA1"] @? "add of unicode filename failed" + annexed_present sha1annexedfileunicode subdirs = TestCase $ intmpclonerepo $ do createDirectory "dir" writeFile "dir/foo" $ content annexedfile @@ -691,7 +693,7 @@ test_crypto = "git-annex crypto" ~: intmpclonerepo $ when Build.SysConfig.gpg $ git_annex :: String -> [String] -> IO Bool git_annex command params = do -- catch all errors, including normally fatal errors - r <- E.try (run)::IO (Either E.SomeException ()) + r <- try (run)::IO (Either SomeException ()) case r of Right _ -> return True Left _ -> return False @@ -757,7 +759,7 @@ indir dir a = do -- any type of error and change back to cwd before -- rethrowing. r <- bracket_ (changeToTmpDir dir) (changeWorkingDirectory cwd) - (E.try (a)::IO (Either E.SomeException ())) + (try (a)::IO (Either SomeException ())) case r of Right () -> return () Left e -> throw e @@ -828,14 +830,14 @@ checkunwritable f = do checkwritable :: FilePath -> Assertion checkwritable f = do - r <- try $ writeFile f $ content f + r <- tryIO $ writeFile f $ content f case r of Left _ -> assertFailure $ "unable to modify " ++ f Right _ -> return () checkdangling :: FilePath -> Assertion checkdangling f = do - r <- try $ readFile f + r <- tryIO $ readFile f case r of Left _ -> return () -- expected; dangling link Right _ -> assertFailure $ f ++ " was not a dangling link as expected" @@ -919,6 +921,9 @@ sha1annexedfile = "sha1foo" sha1annexedfiledup :: String sha1annexedfiledup = "sha1foodup" +sha1annexedfileunicode :: String +sha1annexedfileunicode = "foo¡" + ingitfile :: String ingitfile = "bar" @@ -928,6 +933,7 @@ content f | f == ingitfile = "normal file content" | f == sha1annexedfile ="sha1 annexed file content" | f == sha1annexedfiledup = content sha1annexedfile + | f == sha1annexedfileunicode ="sha1 annexed file content ¡ünicodé!" | f == wormannexedfile = "worm annexed file content" | otherwise = "unknown file " ++ f