add post-repair actions

This commit is contained in:
Joey Hess 2013-10-29 14:22:56 -04:00
parent 791c8535b5
commit 496c8b7abb
9 changed files with 113 additions and 71 deletions

View file

@ -118,13 +118,16 @@ runRepair u mrmt destructiverepair = do
- the size changed, delay for another minute, and so on. This will at
- least work to detect is another machine is writing out a new index
- file, since git does so by writing the new content to index.lock.
-
- Returns true if locks were cleaned up.
-}
repairStaleGitLocks :: Git.Repo -> Assistant ()
repairStaleGitLocks :: Git.Repo -> Assistant Bool
repairStaleGitLocks r = do
lockfiles <- filter (not . isInfixOf "gc.pid")
. filter (".lock" `isSuffixOf`)
<$> liftIO (findgitfiles r)
repairStaleLocks lockfiles
return $ not $ null lockfiles
where
findgitfiles = dirContentsRecursiveSkipping (== dropTrailingPathSeparator annexDir) . Git.localGitDir
repairStaleLocks :: [FilePath] -> Assistant ()

View file

@ -8,16 +8,27 @@
module Assistant.RepoProblem where
import Assistant.Common
import Assistant.Types.RepoProblem
import Utility.TList
import Control.Concurrent.STM
{- Gets all repositories that have problems.
- Blocks until there is at least one. -}
getRepoProblems :: Assistant [UUID]
getRepoProblems = (atomically . getTList) <<~ repoProblemChan
{- Gets all repositories that have problems. Blocks until there is at
- least one. -}
getRepoProblems :: Assistant [RepoProblem]
getRepoProblems = nubBy sameRepoProblem
<$> (atomically . getTList) <<~ repoProblemChan
{- Indicates that there was a problem accessing a repo, and the problem
- appears to not be a transient (eg network connection) problem. -}
repoHasProblem :: UUID -> Assistant ()
repoHasProblem r = (atomically . flip consTList r) <<~ repoProblemChan
{- Indicates that there was a problem with a repository, and the problem
- appears to not be a transient (eg network connection) problem.
-
- If the problem is able to be repaired, the passed action will be run.
- (However, if multiple problems are reported with a single repository,
- only a single action will be run.)
-}
repoHasProblem :: UUID -> Assistant () -> Assistant ()
repoHasProblem u afterrepair = do
rp <- RepoProblem
<$> pure u
<*> asIO afterrepair
(atomically . flip consTList rp) <<~ repoProblemChan

View file

@ -67,7 +67,7 @@ reconnectRemotes notifypushes rs = void $ do
failedrs <- syncAction rs' (const go)
forM_ failedrs $ \r ->
whenM (liftIO $ Remote.checkAvailable False r) $
repoHasProblem (Remote.uuid r)
repoHasProblem (Remote.uuid r) (syncRemote r)
mapM_ signal $ filter (`notElem` failedrs) rs'
where
gitremotes = filter (notspecialremote . Remote.repo) rs

View file

@ -1,55 +0,0 @@
{- git-annex assistant thread to handle reported problems with repositories
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.Threads.ProblemChecker (
problemCheckerThread
) where
import Assistant.Common
import Utility.ThreadScheduler
import Assistant.Types.UrlRenderer
import Assistant.Alert
import Remote
import qualified Types.Remote as Remote
import qualified Git.Fsck
import Assistant.Repair
import qualified Git
import Assistant.RepoProblem
import Assistant.Sync
import Annex.UUID
{- Waits for problems with a repo, and tries to fsck the repo and repair
- the problem. -}
problemCheckerThread :: UrlRenderer -> NamedThread
problemCheckerThread urlrenderer = namedThread "ProblemChecker" $ forever $ do
mapM_ (handleProblem urlrenderer)
=<< nub <$> getRepoProblems
liftIO $ threadDelaySeconds (Seconds 60)
handleProblem :: UrlRenderer -> UUID -> Assistant ()
handleProblem urlrenderer u = ifM ((==) u <$> liftAnnex getUUID)
( handleLocalRepoProblem urlrenderer
, maybe noop (handleRemoteProblem urlrenderer)
=<< liftAnnex (remoteFromUUID u)
)
handleRemoteProblem :: UrlRenderer -> Remote -> Assistant ()
handleRemoteProblem urlrenderer rmt
| Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) =
whenM (liftIO $ checkAvailable True rmt) $ do
repairStaleGitLocks r
fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $
Git.Fsck.findBroken True r
whenM (repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults) $
syncRemote rmt
| otherwise = noop
where
r = Remote.repo rmt
handleLocalRepoProblem :: UrlRenderer -> Assistant ()
handleLocalRepoProblem urlrenderer = do
repairStaleGitLocks =<< liftAnnex gitRepo

View file

@ -0,0 +1,70 @@
{- git-annex assistant thread to handle fixing problems with repositories
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.Threads.ProblemFixer (
problemFixerThread
) where
import Assistant.Common
import Assistant.Types.RepoProblem
import Assistant.RepoProblem
import Assistant.Types.UrlRenderer
import Assistant.Alert
import Remote
import qualified Types.Remote as Remote
import qualified Git.Fsck
import Assistant.Repair
import qualified Git
import Annex.UUID
import Utility.ThreadScheduler
{- Waits for problems with a repo, and tries to fsck the repo and repair
- the problem. -}
problemFixerThread :: UrlRenderer -> NamedThread
problemFixerThread urlrenderer = namedThread "ProblemFixer" $
go =<< getRepoProblems
where
go problems = do
mapM_ (handleProblem urlrenderer) problems
liftIO $ threadDelaySeconds (Seconds 60)
-- Problems may have been re-reported while they were being
-- fixed, so ignore those. If a new unique problem happened
-- 60 seconds after the last was fixed, we're unlikely
-- to do much good anyway.
go =<< filter (\p -> not (any (sameRepoProblem p) problems))
<$> getRepoProblems
handleProblem :: UrlRenderer -> RepoProblem -> Assistant ()
handleProblem urlrenderer repoproblem = do
fixed <- ifM ((==) (problemUUID repoproblem) <$> liftAnnex getUUID)
( handleLocalRepoProblem urlrenderer
, maybe (return False) (handleRemoteProblem urlrenderer)
=<< liftAnnex (remoteFromUUID $ problemUUID repoproblem)
)
when fixed $
liftIO $ afterFix repoproblem
handleRemoteProblem :: UrlRenderer -> Remote -> Assistant Bool
handleRemoteProblem urlrenderer rmt
| Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) =
ifM (liftIO $ checkAvailable True rmt)
( do
fixedlocks <- repairStaleGitLocks r
fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $
Git.Fsck.findBroken True r
repaired <- repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults
return $ fixedlocks || repaired
, return False
)
| otherwise = return False
where
r = Remote.repo rmt
{- This is not yet used, and should probably do a fsck. -}
handleLocalRepoProblem :: UrlRenderer -> Assistant Bool
handleLocalRepoProblem urlrenderer = do
repairStaleGitLocks =<< liftAnnex gitRepo

View file

@ -33,7 +33,7 @@ import Data.Time.Clock.POSIX
- being nonresponsive.) -}
sanityCheckerStartupThread :: Maybe Duration -> NamedThread
sanityCheckerStartupThread startupdelay = namedThreadUnchecked "SanityCheckerStartup" $ do
repairStaleGitLocks =<< liftAnnex gitRepo
void $ repairStaleGitLocks =<< liftAnnex gitRepo
liftIO $ maybe noop (threadDelaySeconds . Seconds . fromIntegral . durationSeconds) startupdelay

View file

@ -1,4 +1,4 @@
{- git-annex assistant remote problem detection
{- git-annex assistant repository problem tracking
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
@ -11,8 +11,18 @@ import Types
import Utility.TList
import Control.Concurrent.STM
import Data.Function
type RepoProblemChan = TList UUID
data RepoProblem = RepoProblem
{ problemUUID :: UUID
, afterFix :: IO ()
}
{- The afterFix actions are assumed to all be equivilant. -}
sameRepoProblem :: RepoProblem -> RepoProblem -> Bool
sameRepoProblem = (==) `on` problemUUID
type RepoProblemChan = TList RepoProblem
newRepoProblemChan :: IO RepoProblemChan
newRepoProblemChan = atomically newTList