move code around and rename thread; no functional changes

This commit is contained in:
Joey Hess 2013-10-29 13:41:44 -04:00
parent d068ec79ff
commit fabb0c50b7
10 changed files with 130 additions and 122 deletions

View file

@ -23,7 +23,7 @@ import Assistant.Threads.TransferWatcher
import Assistant.Threads.Transferrer import Assistant.Threads.Transferrer
import Assistant.Threads.SanityChecker import Assistant.Threads.SanityChecker
import Assistant.Threads.Cronner import Assistant.Threads.Cronner
import Assistant.Threads.RemoteChecker import Assistant.Threads.ProblemChecker
#ifdef WITH_CLIBS #ifdef WITH_CLIBS
import Assistant.Threads.MountWatcher import Assistant.Threads.MountWatcher
#endif #endif
@ -130,7 +130,7 @@ startDaemon assistant foreground startdelay listenhost startbrowser = do
, assist $ daemonStatusThread , assist $ daemonStatusThread
, assist $ sanityCheckerDailyThread , assist $ sanityCheckerDailyThread
, assist $ sanityCheckerHourlyThread , assist $ sanityCheckerHourlyThread
, assist $ remoteCheckerThread urlrenderer , assist $ problemCheckerThread urlrenderer
#ifdef WITH_CLIBS #ifdef WITH_CLIBS
, assist $ mountWatcherThread , assist $ mountWatcherThread
#endif #endif

View file

@ -39,7 +39,7 @@ import Assistant.Types.Pushes
import Assistant.Types.BranchChange import Assistant.Types.BranchChange
import Assistant.Types.Commits import Assistant.Types.Commits
import Assistant.Types.Changes import Assistant.Types.Changes
import Assistant.Types.RemoteProblem import Assistant.Types.RepoProblem
import Assistant.Types.Buddies import Assistant.Types.Buddies
import Assistant.Types.NetMessager import Assistant.Types.NetMessager
import Assistant.Types.ThreadName import Assistant.Types.ThreadName
@ -64,7 +64,7 @@ data AssistantData = AssistantData
, failedPushMap :: FailedPushMap , failedPushMap :: FailedPushMap
, commitChan :: CommitChan , commitChan :: CommitChan
, changePool :: ChangePool , changePool :: ChangePool
, remoteProblemChan :: RemoteProblemChan , repoProblemChan :: RepoProblemChan
, branchChangeHandle :: BranchChangeHandle , branchChangeHandle :: BranchChangeHandle
, buddyList :: BuddyList , buddyList :: BuddyList
, netMessager :: NetMessager , netMessager :: NetMessager
@ -82,7 +82,7 @@ newAssistantData st dstatus = AssistantData
<*> newFailedPushMap <*> newFailedPushMap
<*> newCommitChan <*> newCommitChan
<*> newChangePool <*> newChangePool
<*> newRemoteProblemChan <*> newRepoProblemChan
<*> newBranchChangeHandle <*> newBranchChangeHandle
<*> newBuddyList <*> newBuddyList
<*> newNetMessager <*> newNetMessager

View file

@ -1,23 +0,0 @@
{- git-annex assistant remote problem handling
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.RemoteProblem where
import Assistant.Common
import Utility.TList
import Control.Concurrent.STM
{- Gets all remotes that have problems.
- Blocks until there is at least one. -}
getRemoteProblems :: Assistant [Remote]
getRemoteProblems = (atomically . getTList) <<~ remoteProblemChan
{- Indicates that there was a problem accessing a remote, and the problem
- appears to not be a transient (eg network connection) problem. -}
remoteHasProblem :: Remote -> Assistant ()
remoteHasProblem r = (atomically . flip consTList r) <<~ remoteProblemChan

View file

@ -28,6 +28,8 @@ import Assistant.Types.UrlRenderer
import Assistant.WebApp.Types import Assistant.WebApp.Types
import qualified Data.Text as T import qualified Data.Text as T
#endif #endif
import qualified Utility.Lsof as Lsof
import Utility.ThreadScheduler
import Control.Concurrent.Async import Control.Concurrent.Async
@ -105,3 +107,43 @@ runRepair u mrmt destructiverepair = do
backgroundfsck params = liftIO $ void $ async $ do backgroundfsck params = liftIO $ void $ async $ do
program <- readProgramFile program <- readProgramFile
batchCommand program (Param "fsck" : params) batchCommand program (Param "fsck" : params)
{- Detect when a git lock file exists and has no git process currently
- writing to it. This strongly suggests it is a stale lock file.
-
- However, this could be on a network filesystem. Which is not very safe
- anyway (the assistant relies on being able to check when files have
- no writers to know when to commit them). Just in case, when the lock
- file appears stale, we delay for one minute, and check its size. If
- the size changed, delay for another minute, and so on. This will at
- least work to detect is another machine is writing out a new index
- file, since git does so by writing the new content to index.lock.
-}
checkStaleGitLocks :: Assistant ()
checkStaleGitLocks = do
lockfiles <- filter (not . isInfixOf "gc.pid")
. filter (".lock" `isSuffixOf`)
<$> (liftIO . dirContentsRecursiveSkipping (== dropTrailingPathSeparator annexDir)
=<< liftAnnex (fromRepo Git.localGitDir))
checkStaleLocks lockfiles
checkStaleLocks :: [FilePath] -> Assistant ()
checkStaleLocks lockfiles = go =<< getsizes
where
getsize lf = catchMaybeIO $
(\s -> (lf, fileSize s)) <$> getFileStatus lf
getsizes = liftIO $ catMaybes <$> mapM getsize lockfiles
go [] = return ()
go l = ifM (liftIO $ null <$> Lsof.query ("--" : map fst l))
( do
waitforit "to check stale git lock file"
l' <- getsizes
if l' == l
then liftIO $ mapM_ nukeFile (map fst l)
else go l'
, do
waitforit "for git lock file writer"
go =<< getsizes
)
waitforit why = do
notice ["Waiting for 60 seconds", why]
liftIO $ threadDelaySeconds $ Seconds 60

23
Assistant/RepoProblem.hs Normal file
View file

@ -0,0 +1,23 @@
{- git-annex assistant remote problem handling
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.RepoProblem where
import Assistant.Common
import Utility.TList
import Control.Concurrent.STM
{- Gets all repositories that have problems.
- Blocks until there is at least one. -}
getRepoProblems :: Assistant [UUID]
getRepoProblems = (atomically . getTList) <<~ repoProblemChan
{- Indicates that there was a problem accessing a repo, and the problem
- appears to not be a transient (eg network connection) problem. -}
repoHasProblem :: UUID -> Assistant ()
repoHasProblem r = (atomically . flip consTList r) <<~ repoProblemChan

View file

@ -33,7 +33,7 @@ import Assistant.NamedThread
import Assistant.Threads.Watcher (watchThread, WatcherControl(..)) import Assistant.Threads.Watcher (watchThread, WatcherControl(..))
import Assistant.TransferSlots import Assistant.TransferSlots
import Assistant.TransferQueue import Assistant.TransferQueue
import Assistant.RemoteProblem import Assistant.RepoProblem
import Logs.Transfer import Logs.Transfer
import Data.Time.Clock import Data.Time.Clock
@ -67,7 +67,7 @@ reconnectRemotes notifypushes rs = void $ do
failedrs <- syncAction rs' (const go) failedrs <- syncAction rs' (const go)
forM_ failedrs $ \r -> forM_ failedrs $ \r ->
whenM (liftIO $ Remote.checkAvailable False r) $ whenM (liftIO $ Remote.checkAvailable False r) $
remoteHasProblem r repoHasProblem (Remote.uuid r)
mapM_ signal $ filter (`notElem` failedrs) rs' mapM_ signal $ filter (`notElem` failedrs) rs'
where where
gitremotes = filter (notspecialremote . Remote.repo) rs gitremotes = filter (notspecialremote . Remote.repo) rs

View file

@ -0,0 +1,53 @@
{- git-annex assistant thread to handle reported problems with repositories
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.Threads.ProblemChecker (
problemCheckerThread
) where
import Assistant.Common
import Utility.ThreadScheduler
import Assistant.Types.UrlRenderer
import Assistant.Alert
import Remote
import qualified Types.Remote as Remote
import qualified Git.Fsck
import Assistant.Repair
import qualified Git
import Assistant.RepoProblem
import Assistant.Sync
import Annex.UUID
{- Waits for problems with a repo, and tries to fsck the repo and repair
- the problem. -}
problemCheckerThread :: UrlRenderer -> NamedThread
problemCheckerThread urlrenderer = namedThread "ProblemChecker" $ forever $ do
mapM_ (handleProblem urlrenderer)
=<< nub <$> getRepoProblems
liftIO $ threadDelaySeconds (Seconds 60)
handleProblem :: UrlRenderer -> UUID -> Assistant ()
handleProblem urlrenderer u = ifM ((==) u <$> liftAnnex getUUID)
( handleLocalRepoProblem urlrenderer
, maybe noop (handleRemoteProblem urlrenderer)
=<< liftAnnex (remoteFromUUID u)
)
handleRemoteProblem :: UrlRenderer -> Remote -> Assistant ()
handleRemoteProblem urlrenderer rmt
| Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) =
whenM (liftIO $ checkAvailable True rmt) $ do
fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $
Git.Fsck.findBroken True r
whenM (repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults) $
syncRemote rmt
| otherwise = noop
where
r = Remote.repo rmt
handleLocalRepoProblem :: UrlRenderer -> Assistant ()
handleLocalRepoProblem urlrenderer = error "TODO"

View file

@ -1,46 +0,0 @@
{- git-annex assistant remote checker thread
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.Threads.RemoteChecker (
remoteCheckerThread
) where
import Assistant.Common
import Utility.ThreadScheduler
import Assistant.Types.UrlRenderer
import Assistant.Alert
import Remote
import qualified Types.Remote as Remote
import qualified Git.Fsck
import Assistant.Repair
import qualified Git
import Assistant.RemoteProblem
import Assistant.Sync
import Data.Function
{- Waits for problems with remotes, and tries to fsck the remote and repair
- the problem. -}
remoteCheckerThread :: UrlRenderer -> NamedThread
remoteCheckerThread urlrenderer = namedThread "RemoteChecker" $ forever $ do
mapM_ (handleProblem urlrenderer)
=<< liftIO . filterM (checkAvailable True)
=<< nubremotes <$> getRemoteProblems
liftIO $ threadDelaySeconds (Seconds 60)
where
nubremotes = nubBy ((==) `on` Remote.uuid)
handleProblem :: UrlRenderer -> Remote -> Assistant ()
handleProblem urlrenderer rmt
| Git.repoIsLocal r && not (Git.repoIsLocalUnknown r) = do
fsckresults <- showFscking urlrenderer (Just $ Remote.name rmt) $ tryNonAsync $
Git.Fsck.findBroken True r
whenM (repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults) $
syncRemote rmt
| otherwise = noop
where
r = Remote.repo rmt

View file

@ -14,6 +14,7 @@ module Assistant.Threads.SanityChecker (
import Assistant.Common import Assistant.Common
import Assistant.DaemonStatus import Assistant.DaemonStatus
import Assistant.Alert import Assistant.Alert
import Assistant.Repair
import qualified Git.LsFiles import qualified Git.LsFiles
import qualified Git.Command import qualified Git.Command
import qualified Git.Config import qualified Git.Config
@ -23,8 +24,6 @@ import Utility.LogFile
import Utility.Batch import Utility.Batch
import Utility.NotificationBroadcaster import Utility.NotificationBroadcaster
import Config import Config
import qualified Git
import qualified Utility.Lsof as Lsof
import Utility.HumanTime import Utility.HumanTime
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
@ -146,46 +145,6 @@ checkLogSize n = do
where where
filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f) filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f)
{- Detect when a git lock file exists and has no git process currently
- writing to it. This strongly suggests it is a stale lock file.
-
- However, this could be on a network filesystem. Which is not very safe
- anyway (the assistant relies on being able to check when files have
- no writers to know when to commit them). Just in case, when the lock
- file appears stale, we delay for one minute, and check its size. If
- the size changed, delay for another minute, and so on. This will at
- least work to detect is another machine is writing out a new index
- file, since git does so by writing the new content to index.lock.
-}
checkStaleGitLocks :: Assistant ()
checkStaleGitLocks = do
lockfiles <- filter (not . isInfixOf "gc.pid")
. filter (".lock" `isSuffixOf`)
<$> (liftIO . dirContentsRecursiveSkipping (== dropTrailingPathSeparator annexDir)
=<< liftAnnex (fromRepo Git.localGitDir))
checkStaleLocks lockfiles
checkStaleLocks :: [FilePath] -> Assistant ()
checkStaleLocks lockfiles = go =<< getsizes
where
getsize lf = catchMaybeIO $
(\s -> (lf, fileSize s)) <$> getFileStatus lf
getsizes = liftIO $ catMaybes <$> mapM getsize lockfiles
go [] = return ()
go l = ifM (liftIO $ null <$> Lsof.query ("--" : map fst l))
( do
waitforit "to check stale git lock file"
l' <- getsizes
if l' == l
then liftIO $ mapM_ nukeFile (map fst l)
else go l'
, do
waitforit "for git lock file writer"
go =<< getsizes
)
waitforit why = do
notice ["Waiting for 60 seconds", why]
liftIO $ threadDelaySeconds $ Seconds 60
oneMegabyte :: Int oneMegabyte :: Int
oneMegabyte = 1000000 oneMegabyte = 1000000

View file

@ -5,14 +5,14 @@
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Assistant.Types.RemoteProblem where module Assistant.Types.RepoProblem where
import Types import Types
import Utility.TList import Utility.TList
import Control.Concurrent.STM import Control.Concurrent.STM
type RemoteProblemChan = TList Remote type RepoProblemChan = TList UUID
newRemoteProblemChan :: IO RemoteProblemChan newRepoProblemChan :: IO RepoProblemChan
newRemoteProblemChan = atomically newTList newRepoProblemChan = atomically newTList