automatically launch git repository repair

Added a RemoteChecker thread, that waits for problems to be reported with
remotes, and checks if their git repository is in need of repair.

Currently, only failures to sync with the remote cause a problem to be
reported. This seems enough, but we'll see.

Plugging in a removable drive with a repository on it that is corrupted
does automatically repair the repository, as long as the corruption causes
git push or git pull to fail. Some types of corruption do not, eg
missing/corrupt objects for blobs that git push doesn't need to look at.

So, this is not really a replacement for scheduled git repository fscking.
But it does make the assistant more robust.

This commit is sponsored by Fernando Jimenez.
This commit is contained in:
Joey Hess 2013-10-27 16:42:13 -04:00
parent 3c08fee76b
commit a7821c0581
11 changed files with 129 additions and 39 deletions

View file

@ -23,6 +23,7 @@ import Assistant.Threads.TransferWatcher
import Assistant.Threads.Transferrer
import Assistant.Threads.SanityChecker
import Assistant.Threads.Cronner
import Assistant.Threads.RemoteChecker
#ifdef WITH_CLIBS
import Assistant.Threads.MountWatcher
#endif
@ -129,6 +130,7 @@ startDaemon assistant foreground startdelay listenhost startbrowser = do
, assist $ daemonStatusThread
, assist $ sanityCheckerDailyThread
, assist $ sanityCheckerHourlyThread
, assist $ remoteCheckerThread urlrenderer
#ifdef WITH_CLIBS
, assist $ mountWatcherThread
#endif

View file

@ -19,6 +19,7 @@ import Git.Remote (RemoteName)
import Data.String
import qualified Data.Text as T
import qualified Control.Exception as E
#ifdef WITH_WEBAPP
import Assistant.Monad
@ -174,6 +175,17 @@ fsckAlert button n = baseActivityAlert
, alertButton = Just button
}
showFscking :: UrlRenderer -> Maybe RemoteName -> IO (Either E.SomeException a) -> Assistant a
showFscking urlrenderer remotename a = do
#ifdef WITH_WEBAPP
button <- mkAlertButton False (T.pack "Configure") urlrenderer ConfigFsckR
r <- alertDuring (fsckAlert button remotename) $
liftIO a
either (liftIO . E.throwIO) return r
#else
a
#endif
brokenRepositoryAlert :: AlertButton -> Alert
brokenRepositoryAlert = errorAlert "Serious problems have been detected with your repository. This needs your immediate attention!"

View file

@ -39,6 +39,7 @@ import Assistant.Types.Pushes
import Assistant.Types.BranchChange
import Assistant.Types.Commits
import Assistant.Types.Changes
import Assistant.Types.RemoteProblem
import Assistant.Types.Buddies
import Assistant.Types.NetMessager
import Assistant.Types.ThreadName
@ -63,6 +64,7 @@ data AssistantData = AssistantData
, failedPushMap :: FailedPushMap
, commitChan :: CommitChan
, changePool :: ChangePool
, remoteProblemChan :: RemoteProblemChan
, branchChangeHandle :: BranchChangeHandle
, buddyList :: BuddyList
, netMessager :: NetMessager
@ -80,6 +82,7 @@ newAssistantData st dstatus = AssistantData
<*> newFailedPushMap
<*> newCommitChan
<*> newChangePool
<*> newRemoteProblemChan
<*> newBranchChangeHandle
<*> newBuddyList
<*> newNetMessager

View file

@ -0,0 +1,23 @@
{- git-annex assistant remote problem handling
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.RemoteProblem where
import Assistant.Common
import Utility.TList
import Control.Concurrent.STM
{- Gets all remotes that have problems.
- Blocks until there is at least one. -}
getRemoteProblems :: Assistant [Remote]
getRemoteProblems = (atomically . getTList) <<~ remoteProblemChan
{- Indicates that there was a problem accessing a remote, and the problem
- appears to not be a transient (eg network connection) problem. -}
remoteHasProblem :: Remote -> Assistant ()
remoteHasProblem r = (atomically . flip consTList r) <<~ remoteProblemChan

View file

@ -33,24 +33,21 @@ import Control.Concurrent.Async
{- When the FsckResults require a repair, tries to do a non-destructive
- repair. If that fails, pops up an alert. -}
repairWhenNecessary :: UrlRenderer -> UUID -> Maybe Remote -> FsckResults -> Assistant ()
repairWhenNecessary :: UrlRenderer -> UUID -> Maybe Remote -> FsckResults -> Assistant Bool
repairWhenNecessary urlrenderer u mrmt fsckresults
| foundBroken fsckresults = do
liftAnnex $ writeFsckResults u fsckresults
repodesc <- liftAnnex $ Remote.prettyUUID u
handle =<< alertDuring (repairingAlert repodesc)
ok <- alertWhile (repairingAlert repodesc)
(runRepair u mrmt False)
| otherwise = noop
where
handle True = return ()
handle False = do
#ifdef WITH_WEBAPP
button <- mkAlertButton True (T.pack "Click Here") urlrenderer $
RepairRepositoryR u
void $ addAlert $ brokenRepositoryAlert button
#else
return ()
unless ok $ do
button <- mkAlertButton True (T.pack "Click Here") urlrenderer $
RepairRepositoryR u
void $ addAlert $ brokenRepositoryAlert button
return ok
#endif
| otherwise = return False
runRepair :: UUID -> Maybe Remote -> Bool -> Assistant Bool
runRepair u mrmt destructiverepair = do

View file

@ -33,6 +33,7 @@ import Assistant.NamedThread
import Assistant.Threads.Watcher (watchThread, WatcherControl(..))
import Assistant.TransferSlots
import Assistant.TransferQueue
import Assistant.RemoteProblem
import Logs.Transfer
import Data.Time.Clock
@ -59,11 +60,14 @@ import Control.Concurrent
reconnectRemotes :: Bool -> [Remote] -> Assistant ()
reconnectRemotes _ [] = noop
reconnectRemotes notifypushes rs = void $ do
rs' <- filterM (checkavailable . Remote.repo) rs
rs' <- liftIO $ filterM (Remote.checkAvailable True) rs
unless (null rs') $ do
modifyDaemonStatus_ $ \s -> s
{ desynced = S.union (S.fromList $ map Remote.uuid xmppremotes) (desynced s) }
failedrs <- syncAction rs' (const go)
forM_ failedrs $ \r ->
whenM (liftIO $ Remote.checkAvailable False r) $
remoteHasProblem r
mapM_ signal $ filter (`notElem` failedrs) rs'
where
gitremotes = filter (notspecialremote . Remote.repo) rs
@ -90,10 +94,6 @@ reconnectRemotes notifypushes rs = void $ do
signal r = liftIO . mapM_ (flip tryPutMVar ())
=<< fromMaybe [] . M.lookup (Remote.uuid r) . connectRemoteNotifiers
<$> getDaemonStatus
checkavailable r
| Git.repoIsLocal r || Git.repoIsLocalUnknown r =
liftIO $ doesDirectoryExist $ Git.repoPath r
| otherwise = return True
{- Updates the local sync branch, then pushes it to all remotes, in
- parallel, along with the git-annex branch. This is the same

View file

@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE DeriveDataTypeable, CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
module Assistant.Threads.Cronner (
cronnerThread
@ -29,10 +29,6 @@ import Assistant.Types.UrlRenderer
import Assistant.Alert
import Remote
import qualified Types.Remote as Remote
#ifdef WITH_WEBAPP
import Assistant.WebApp.Types
#endif
import Git.Remote (RemoteName)
import qualified Git.Fsck
import Assistant.Repair
import qualified Git
@ -43,8 +39,6 @@ import Data.Time.LocalTime
import Data.Time.Clock
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Control.Exception as E
import qualified Data.Text as T
{- Loads schedules for this repository, and fires off one thread for each
- scheduled event that runs on this repository. Each thread sleeps until
@ -191,7 +185,7 @@ runActivity' urlrenderer (ScheduledSelfFsck _ d) = do
void $ batchCommand program (Param "fsck" : annexFsckParams d)
Git.Fsck.findBroken True g
u <- liftAnnex getUUID
repairWhenNecessary urlrenderer u Nothing fsckresults
void $ repairWhenNecessary urlrenderer u Nothing fsckresults
mapM_ reget =<< liftAnnex (dirKeys gitAnnexBadDir)
where
reget k = queueTransfers "fsck found bad file; redownloading" Next k Nothing Download
@ -220,18 +214,7 @@ runActivity' urlrenderer (ScheduledRemoteFsck u s d) = handle =<< liftAnnex (rem
if Git.repoIsLocal r && not (Git.repoIsLocalUnknown r)
then Just <$> Git.Fsck.findBroken True r
else pure Nothing
maybe noop (repairWhenNecessary urlrenderer u (Just rmt)) fsckresults
showFscking :: UrlRenderer -> Maybe RemoteName -> IO (Either E.SomeException a) -> Assistant a
showFscking urlrenderer remotename a = do
#ifdef WITH_WEBAPP
button <- mkAlertButton False (T.pack "Configure") urlrenderer ConfigFsckR
r <- alertDuring (fsckAlert button remotename) $
liftIO a
either (liftIO . E.throwIO) return r
#else
a
#endif
maybe noop (void . repairWhenNecessary urlrenderer u (Just rmt)) fsckresults
annexFsckParams :: Duration -> [CommandParam]
annexFsckParams d =

View file

@ -13,6 +13,7 @@ import Assistant.Pushes
import Assistant.DaemonStatus
import Assistant.Sync
import Utility.ThreadScheduler
import qualified Remote
import qualified Types.Remote as Remote
{- This thread retries pushes that failed before. -}
@ -42,7 +43,7 @@ pushThread = namedThread "Pusher" $ runEvery (Seconds 2) <~> do
- to avoid ugly messages when a removable drive is not attached.
-}
pushTargets :: Assistant [Remote]
pushTargets = liftIO . filterM available =<< candidates <$> getDaemonStatus
pushTargets = liftIO . filterM (Remote.checkAvailable True)
=<< candidates <$> getDaemonStatus
where
candidates = filter (not . Remote.readonly) . syncGitRemotes
available = maybe (return True) doesDirectoryExist . Remote.localpath

View file

@ -0,0 +1,46 @@
{- git-annex assistant remote checker thread
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.Threads.RemoteChecker (
remoteCheckerThread
) where
import Assistant.Common
import Utility.ThreadScheduler
import Assistant.Types.UrlRenderer
import Assistant.Alert
import Remote
import qualified Types.Remote as Remote
import qualified Git.Fsck
import Assistant.Repair
import qualified Git
import Assistant.RemoteProblem
import Assistant.Sync
import Data.Function
{- Waits for problems with remotes, and tries to fsck the remote and repair
- the problem. -}
remoteCheckerThread :: UrlRenderer -> NamedThread
remoteCheckerThread urlrenderer = namedThread "RemoteChecker" $ forever $ do
mapM_ (handleProblem urlrenderer)
=<< liftIO . filterM (checkAvailable True)
=<< nubremotes <$> getRemoteProblems
liftIO $ threadDelaySeconds (Seconds 60)
where
nubremotes = nubBy ((==) `on` Remote.uuid)
handleProblem :: UrlRenderer -> Remote -> Assistant ()
handleProblem urlrenderer rmt
| Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) = do
fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $
Git.Fsck.findBroken True r
whenM (repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults) $
syncRemote rmt
| otherwise = noop
where
r = Remote.repo rmt

View file

@ -0,0 +1,18 @@
{- git-annex assistant remote problem detection
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.Types.RemoteProblem where
import Types
import Utility.TList
import Control.Concurrent.STM
type RemoteProblemChan = TList Remote
newRemoteProblemChan :: IO RemoteProblemChan
newRemoteProblemChan = atomically newTList

View file

@ -39,7 +39,8 @@ module Remote (
showTriedRemotes,
showLocations,
forceTrust,
logStatus
logStatus,
checkAvailable
) where
import qualified Data.Map as M
@ -274,3 +275,7 @@ byCost = map snd . sortBy (comparing fst) . M.toList . costmap
where
costmap = M.fromListWith (++) . map costpair
costpair r = (cost r, [r])
checkAvailable :: Bool -> Remote -> IO Bool
checkAvailable assumenetworkavailable =
maybe (return assumenetworkavailable) doesDirectoryExist . localpath