diff --git a/Assistant.hs b/Assistant.hs index aa1399c031..c7ca98ee91 100644 --- a/Assistant.hs +++ b/Assistant.hs @@ -23,7 +23,7 @@ import Assistant.Threads.TransferWatcher import Assistant.Threads.Transferrer import Assistant.Threads.SanityChecker import Assistant.Threads.Cronner -import Assistant.Threads.ProblemChecker +import Assistant.Threads.ProblemFixer #ifdef WITH_CLIBS import Assistant.Threads.MountWatcher #endif @@ -130,7 +130,7 @@ startDaemon assistant foreground startdelay listenhost startbrowser = do , assist $ daemonStatusThread , assist $ sanityCheckerDailyThread , assist $ sanityCheckerHourlyThread - , assist $ problemCheckerThread urlrenderer + , assist $ problemFixerThread urlrenderer #ifdef WITH_CLIBS , assist $ mountWatcherThread #endif diff --git a/Assistant/Repair.hs b/Assistant/Repair.hs index 06fa7202fc..1f54451251 100644 --- a/Assistant/Repair.hs +++ b/Assistant/Repair.hs @@ -118,13 +118,16 @@ runRepair u mrmt destructiverepair = do - the size changed, delay for another minute, and so on. This will at - least work to detect is another machine is writing out a new index - file, since git does so by writing the new content to index.lock. + - + - Returns true if locks were cleaned up. -} -repairStaleGitLocks :: Git.Repo -> Assistant () +repairStaleGitLocks :: Git.Repo -> Assistant Bool repairStaleGitLocks r = do lockfiles <- filter (not . isInfixOf "gc.pid") . filter (".lock" `isSuffixOf`) <$> liftIO (findgitfiles r) repairStaleLocks lockfiles + return $ not $ null lockfiles where findgitfiles = dirContentsRecursiveSkipping (== dropTrailingPathSeparator annexDir) . Git.localGitDir repairStaleLocks :: [FilePath] -> Assistant () diff --git a/Assistant/RepoProblem.hs b/Assistant/RepoProblem.hs index d2e5a5cf19..6913fefc62 100644 --- a/Assistant/RepoProblem.hs +++ b/Assistant/RepoProblem.hs @@ -8,16 +8,27 @@ module Assistant.RepoProblem where import Assistant.Common +import Assistant.Types.RepoProblem import Utility.TList import Control.Concurrent.STM -{- Gets all repositories that have problems. - - Blocks until there is at least one. -} -getRepoProblems :: Assistant [UUID] -getRepoProblems = (atomically . getTList) <<~ repoProblemChan +{- Gets all repositories that have problems. Blocks until there is at + - least one. -} +getRepoProblems :: Assistant [RepoProblem] +getRepoProblems = nubBy sameRepoProblem + <$> (atomically . getTList) <<~ repoProblemChan -{- Indicates that there was a problem accessing a repo, and the problem - - appears to not be a transient (eg network connection) problem. -} -repoHasProblem :: UUID -> Assistant () -repoHasProblem r = (atomically . flip consTList r) <<~ repoProblemChan +{- Indicates that there was a problem with a repository, and the problem + - appears to not be a transient (eg network connection) problem. + - + - If the problem is able to be repaired, the passed action will be run. + - (However, if multiple problems are reported with a single repository, + - only a single action will be run.) + -} +repoHasProblem :: UUID -> Assistant () -> Assistant () +repoHasProblem u afterrepair = do + rp <- RepoProblem + <$> pure u + <*> asIO afterrepair + (atomically . flip consTList rp) <<~ repoProblemChan diff --git a/Assistant/Sync.hs b/Assistant/Sync.hs index 4a021df2e5..f7656f52df 100644 --- a/Assistant/Sync.hs +++ b/Assistant/Sync.hs @@ -67,7 +67,7 @@ reconnectRemotes notifypushes rs = void $ do failedrs <- syncAction rs' (const go) forM_ failedrs $ \r -> whenM (liftIO $ Remote.checkAvailable False r) $ - repoHasProblem (Remote.uuid r) + repoHasProblem (Remote.uuid r) (syncRemote r) mapM_ signal $ filter (`notElem` failedrs) rs' where gitremotes = filter (notspecialremote . Remote.repo) rs diff --git a/Assistant/Threads/ProblemChecker.hs b/Assistant/Threads/ProblemChecker.hs deleted file mode 100644 index 66dcadfffc..0000000000 --- a/Assistant/Threads/ProblemChecker.hs +++ /dev/null @@ -1,55 +0,0 @@ -{- git-annex assistant thread to handle reported problems with repositories - - - - Copyright 2013 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -module Assistant.Threads.ProblemChecker ( - problemCheckerThread -) where - -import Assistant.Common -import Utility.ThreadScheduler -import Assistant.Types.UrlRenderer -import Assistant.Alert -import Remote -import qualified Types.Remote as Remote -import qualified Git.Fsck -import Assistant.Repair -import qualified Git -import Assistant.RepoProblem -import Assistant.Sync -import Annex.UUID - -{- Waits for problems with a repo, and tries to fsck the repo and repair - - the problem. -} -problemCheckerThread :: UrlRenderer -> NamedThread -problemCheckerThread urlrenderer = namedThread "ProblemChecker" $ forever $ do - mapM_ (handleProblem urlrenderer) - =<< nub <$> getRepoProblems - liftIO $ threadDelaySeconds (Seconds 60) - -handleProblem :: UrlRenderer -> UUID -> Assistant () -handleProblem urlrenderer u = ifM ((==) u <$> liftAnnex getUUID) - ( handleLocalRepoProblem urlrenderer - , maybe noop (handleRemoteProblem urlrenderer) - =<< liftAnnex (remoteFromUUID u) - ) - -handleRemoteProblem :: UrlRenderer -> Remote -> Assistant () -handleRemoteProblem urlrenderer rmt - | Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) = - whenM (liftIO $ checkAvailable True rmt) $ do - repairStaleGitLocks r - fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $ - Git.Fsck.findBroken True r - whenM (repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults) $ - syncRemote rmt - | otherwise = noop - where - r = Remote.repo rmt - -handleLocalRepoProblem :: UrlRenderer -> Assistant () -handleLocalRepoProblem urlrenderer = do - repairStaleGitLocks =<< liftAnnex gitRepo diff --git a/Assistant/Threads/ProblemFixer.hs b/Assistant/Threads/ProblemFixer.hs new file mode 100644 index 0000000000..f9774e0f06 --- /dev/null +++ b/Assistant/Threads/ProblemFixer.hs @@ -0,0 +1,70 @@ +{- git-annex assistant thread to handle fixing problems with repositories + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Assistant.Threads.ProblemFixer ( + problemFixerThread +) where + +import Assistant.Common +import Assistant.Types.RepoProblem +import Assistant.RepoProblem +import Assistant.Types.UrlRenderer +import Assistant.Alert +import Remote +import qualified Types.Remote as Remote +import qualified Git.Fsck +import Assistant.Repair +import qualified Git +import Annex.UUID +import Utility.ThreadScheduler + +{- Waits for problems with a repo, and tries to fsck the repo and repair + - the problem. -} +problemFixerThread :: UrlRenderer -> NamedThread +problemFixerThread urlrenderer = namedThread "ProblemFixer" $ + go =<< getRepoProblems + where + go problems = do + mapM_ (handleProblem urlrenderer) problems + liftIO $ threadDelaySeconds (Seconds 60) + -- Problems may have been re-reported while they were being + -- fixed, so ignore those. If a new unique problem happened + -- 60 seconds after the last was fixed, we're unlikely + -- to do much good anyway. + go =<< filter (\p -> not (any (sameRepoProblem p) problems)) + <$> getRepoProblems + +handleProblem :: UrlRenderer -> RepoProblem -> Assistant () +handleProblem urlrenderer repoproblem = do + fixed <- ifM ((==) (problemUUID repoproblem) <$> liftAnnex getUUID) + ( handleLocalRepoProblem urlrenderer + , maybe (return False) (handleRemoteProblem urlrenderer) + =<< liftAnnex (remoteFromUUID $ problemUUID repoproblem) + ) + when fixed $ + liftIO $ afterFix repoproblem + +handleRemoteProblem :: UrlRenderer -> Remote -> Assistant Bool +handleRemoteProblem urlrenderer rmt + | Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) = + ifM (liftIO $ checkAvailable True rmt) + ( do + fixedlocks <- repairStaleGitLocks r + fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $ + Git.Fsck.findBroken True r + repaired <- repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults + return $ fixedlocks || repaired + , return False + ) + | otherwise = return False + where + r = Remote.repo rmt + +{- This is not yet used, and should probably do a fsck. -} +handleLocalRepoProblem :: UrlRenderer -> Assistant Bool +handleLocalRepoProblem urlrenderer = do + repairStaleGitLocks =<< liftAnnex gitRepo diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs index 0dd047fc9e..4f5eeda50c 100644 --- a/Assistant/Threads/SanityChecker.hs +++ b/Assistant/Threads/SanityChecker.hs @@ -33,7 +33,7 @@ import Data.Time.Clock.POSIX - being nonresponsive.) -} sanityCheckerStartupThread :: Maybe Duration -> NamedThread sanityCheckerStartupThread startupdelay = namedThreadUnchecked "SanityCheckerStartup" $ do - repairStaleGitLocks =<< liftAnnex gitRepo + void $ repairStaleGitLocks =<< liftAnnex gitRepo liftIO $ maybe noop (threadDelaySeconds . Seconds . fromIntegral . durationSeconds) startupdelay diff --git a/Assistant/Types/RepoProblem.hs b/Assistant/Types/RepoProblem.hs index 40397c7087..ece5a52868 100644 --- a/Assistant/Types/RepoProblem.hs +++ b/Assistant/Types/RepoProblem.hs @@ -1,4 +1,4 @@ -{- git-annex assistant remote problem detection +{- git-annex assistant repository problem tracking - - Copyright 2013 Joey Hess - @@ -11,8 +11,18 @@ import Types import Utility.TList import Control.Concurrent.STM +import Data.Function -type RepoProblemChan = TList UUID +data RepoProblem = RepoProblem + { problemUUID :: UUID + , afterFix :: IO () + } + +{- The afterFix actions are assumed to all be equivilant. -} +sameRepoProblem :: RepoProblem -> RepoProblem -> Bool +sameRepoProblem = (==) `on` problemUUID + +type RepoProblemChan = TList RepoProblem newRepoProblemChan :: IO RepoProblemChan newRepoProblemChan = atomically newTList diff --git a/doc/design/assistant/disaster_recovery.mdwn b/doc/design/assistant/disaster_recovery.mdwn index 4f72d96c34..40e48650e4 100644 --- a/doc/design/assistant/disaster_recovery.mdwn +++ b/doc/design/assistant/disaster_recovery.mdwn @@ -62,7 +62,10 @@ Add git fsck to scheduled self fsck **done** TODO: git fsck on ssh remotes? Probably not worth the complexity.. TODO: If committing to the repository fails, after resolving any dangling -lock files (see above), it should git fsck. +lock files (see above), it should git fsck. This is difficult, because +git commit will also fail if the commit turns out to be empty, or due to +other transient problems.. So commit failures are currently ignored by the +assistant. If git fsck finds problems, launch git repository repair. **done**