assistant: Detect stale git lock files at startup time, and remove them.
Extends the index.lock handling to other git lock files. I surveyed all lock files used by git, and found more than I expected. All are handled the same in git; it leaves them open while doing the operation, possibly writing the new file content to the lock file, and then closes them when done. The gc.pid file is excluded because it won't affect the normal operation of the assistant, and waiting for a gc to finish on startup wouldn't be good. All threads except the webapp thread wait on the new startup sanity checker thread to complete, so they won't try to do things with git that fail due to stale lock files. The webapp thread mostly avoids doing that kind of thing itself. A few configurators might fail on lock files, but only if the user is explicitly trying to run them. The webapp needs to start immediately when the user has opened it, even if there are stale lock files. Arranging for the threads to wait on the startup sanity checker was a bit of a bear. Have to get all the NotificationHandles set up before the startup sanity checker runs, or they won't see its signal. Perhaps the NotificationBroadcaster is not the best interface to have used for this. Oh well, it works. This commit was sponsored by Michael Jakl
This commit is contained in:
parent
370dfd141b
commit
635c9a1549
10 changed files with 101 additions and 51 deletions
|
@ -136,6 +136,9 @@ startDaemon assistant foreground listenhost startbrowser = do
|
||||||
, assist $ configMonitorThread
|
, assist $ configMonitorThread
|
||||||
, assist $ glacierThread
|
, assist $ glacierThread
|
||||||
, watch $ watchThread
|
, watch $ watchThread
|
||||||
|
-- must come last so that all threads that wait
|
||||||
|
-- on it have already started waiting
|
||||||
|
, watch $ sanityCheckerStartupThread
|
||||||
]
|
]
|
||||||
|
|
||||||
liftIO waitForTermination
|
liftIO waitForTermination
|
||||||
|
|
|
@ -16,6 +16,7 @@ import Assistant.Types.DaemonStatus
|
||||||
import Assistant.Types.UrlRenderer
|
import Assistant.Types.UrlRenderer
|
||||||
import Assistant.DaemonStatus
|
import Assistant.DaemonStatus
|
||||||
import Assistant.Monad
|
import Assistant.Monad
|
||||||
|
import Utility.NotificationBroadcaster
|
||||||
|
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
import Control.Concurrent.Async
|
import Control.Concurrent.Async
|
||||||
|
@ -34,7 +35,7 @@ import qualified Data.Text as T
|
||||||
- Named threads are run by a management thread, so if they crash
|
- Named threads are run by a management thread, so if they crash
|
||||||
- an alert is displayed, allowing the thread to be restarted. -}
|
- an alert is displayed, allowing the thread to be restarted. -}
|
||||||
startNamedThread :: UrlRenderer -> NamedThread -> Assistant ()
|
startNamedThread :: UrlRenderer -> NamedThread -> Assistant ()
|
||||||
startNamedThread urlrenderer namedthread@(NamedThread name a) = do
|
startNamedThread urlrenderer (NamedThread afterstartupsanitycheck name a) = do
|
||||||
m <- startedThreads <$> getDaemonStatus
|
m <- startedThreads <$> getDaemonStatus
|
||||||
case M.lookup name m of
|
case M.lookup name m of
|
||||||
Nothing -> start
|
Nothing -> start
|
||||||
|
@ -44,14 +45,24 @@ startNamedThread urlrenderer namedthread@(NamedThread name a) = do
|
||||||
Right Nothing -> noop
|
Right Nothing -> noop
|
||||||
_ -> start
|
_ -> start
|
||||||
where
|
where
|
||||||
start = do
|
start
|
||||||
|
| afterstartupsanitycheck = do
|
||||||
|
status <- getDaemonStatus
|
||||||
|
h <- liftIO $ newNotificationHandle False $
|
||||||
|
startupSanityCheckNotifier status
|
||||||
|
startwith $ runmanaged $
|
||||||
|
liftIO $ waitNotification h
|
||||||
|
| otherwise = startwith $ runmanaged noop
|
||||||
|
startwith runner = do
|
||||||
d <- getAssistant id
|
d <- getAssistant id
|
||||||
aid <- liftIO $ runmanaged $ d { threadName = name }
|
aid <- liftIO $ runner $ d { threadName = name }
|
||||||
restart <- asIO $ startNamedThread urlrenderer namedthread
|
restart <- asIO $ startNamedThread urlrenderer (NamedThread False name a)
|
||||||
modifyDaemonStatus_ $ \s -> s
|
modifyDaemonStatus_ $ \s -> s
|
||||||
{ startedThreads = M.insertWith' const name (aid, restart) (startedThreads s) }
|
{ startedThreads = M.insertWith' const name (aid, restart) (startedThreads s) }
|
||||||
runmanaged d = do
|
runmanaged first d = do
|
||||||
aid <- async $ runAssistant d a
|
aid <- async $ runAssistant d $ do
|
||||||
|
void first
|
||||||
|
a
|
||||||
void $ forkIO $ manager d aid
|
void $ forkIO $ manager d aid
|
||||||
return aid
|
return aid
|
||||||
manager d aid = do
|
manager d aid = do
|
||||||
|
@ -75,7 +86,7 @@ startNamedThread urlrenderer namedthread@(NamedThread name a) = do
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namedThreadId :: NamedThread -> Assistant (Maybe ThreadId)
|
namedThreadId :: NamedThread -> Assistant (Maybe ThreadId)
|
||||||
namedThreadId (NamedThread name _) = do
|
namedThreadId (NamedThread _ name _) = do
|
||||||
m <- startedThreads <$> getDaemonStatus
|
m <- startedThreads <$> getDaemonStatus
|
||||||
return $ asyncThreadId . fst <$> M.lookup name m
|
return $ asyncThreadId . fst <$> M.lookup name m
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
{- git-annex assistant sanity checker
|
{- git-annex assistant sanity checker
|
||||||
-
|
-
|
||||||
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
- Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Assistant.Threads.SanityChecker (
|
module Assistant.Threads.SanityChecker (
|
||||||
|
sanityCheckerStartupThread,
|
||||||
sanityCheckerDailyThread,
|
sanityCheckerDailyThread,
|
||||||
sanityCheckerHourlyThread
|
sanityCheckerHourlyThread
|
||||||
) where
|
) where
|
||||||
|
@ -20,10 +21,20 @@ import Utility.ThreadScheduler
|
||||||
import qualified Assistant.Threads.Watcher as Watcher
|
import qualified Assistant.Threads.Watcher as Watcher
|
||||||
import Utility.LogFile
|
import Utility.LogFile
|
||||||
import Utility.Batch
|
import Utility.Batch
|
||||||
|
import Utility.NotificationBroadcaster
|
||||||
import Config
|
import Config
|
||||||
|
import qualified Git
|
||||||
|
import qualified Utility.Lsof as Lsof
|
||||||
|
|
||||||
import Data.Time.Clock.POSIX
|
import Data.Time.Clock.POSIX
|
||||||
|
|
||||||
|
{- This thread runs once at startup, and most other threads wait for it
|
||||||
|
- to finish. (However, the webapp thread does not, to prevent the UI
|
||||||
|
- being nonresponsive.) -}
|
||||||
|
sanityCheckerStartupThread :: NamedThread
|
||||||
|
sanityCheckerStartupThread = namedThreadUnchecked "SanityCheckerStartup" $
|
||||||
|
startupCheck
|
||||||
|
|
||||||
{- This thread wakes up hourly for inxepensive frequent sanity checks. -}
|
{- This thread wakes up hourly for inxepensive frequent sanity checks. -}
|
||||||
sanityCheckerHourlyThread :: NamedThread
|
sanityCheckerHourlyThread :: NamedThread
|
||||||
sanityCheckerHourlyThread = namedThread "SanityCheckerHourly" $ forever $ do
|
sanityCheckerHourlyThread = namedThread "SanityCheckerHourly" $ forever $ do
|
||||||
|
@ -69,6 +80,14 @@ waitForNextCheck = do
|
||||||
oneDay - truncate (now - lastcheck)
|
oneDay - truncate (now - lastcheck)
|
||||||
| otherwise = oneDay
|
| otherwise = oneDay
|
||||||
|
|
||||||
|
startupCheck :: Assistant ()
|
||||||
|
startupCheck = do
|
||||||
|
checkStaleGitLocks
|
||||||
|
|
||||||
|
{- Notify other threads that the startup sanity check is done. -}
|
||||||
|
status <- getDaemonStatus
|
||||||
|
liftIO $ sendNotification $ startupSanityCheckNotifier status
|
||||||
|
|
||||||
{- It's important to stay out of the Annex monad as much as possible while
|
{- It's important to stay out of the Annex monad as much as possible while
|
||||||
- running potentially expensive parts of this check, since remaining in it
|
- running potentially expensive parts of this check, since remaining in it
|
||||||
- will block the watcher. -}
|
- will block the watcher. -}
|
||||||
|
@ -128,6 +147,46 @@ checkLogSize n = do
|
||||||
where
|
where
|
||||||
filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f)
|
filesize f = fromIntegral . fileSize <$> liftIO (getFileStatus f)
|
||||||
|
|
||||||
|
{- Detect when a git lock file exists and has no git process currently
|
||||||
|
- writing to it. This strongly suggests it is a stale lock file.
|
||||||
|
-
|
||||||
|
- However, this could be on a network filesystem. Which is not very safe
|
||||||
|
- anyway (the assistant relies on being able to check when files have
|
||||||
|
- no writers to know when to commit them). Just in case, when the lock
|
||||||
|
- file appears stale, we delay for one minute, and check its size. If
|
||||||
|
- the size changed, delay for another minute, and so on. This will at
|
||||||
|
- least work to detect is another machine is writing out a new index
|
||||||
|
- file, since git does so by writing the new content to index.lock.
|
||||||
|
-}
|
||||||
|
checkStaleGitLocks :: Assistant ()
|
||||||
|
checkStaleGitLocks = do
|
||||||
|
lockfiles <- filter (not . isInfixOf "gc.pid")
|
||||||
|
. filter (".lock" `isSuffixOf`)
|
||||||
|
<$> (liftIO . dirContentsRecursiveSkipping (annexDir `isInfixOf`)
|
||||||
|
=<< liftAnnex (fromRepo Git.localGitDir))
|
||||||
|
checkStaleLocks lockfiles
|
||||||
|
checkStaleLocks :: [FilePath] -> Assistant ()
|
||||||
|
checkStaleLocks lockfiles = go =<< getsizes
|
||||||
|
where
|
||||||
|
getsize lf = catchMaybeIO $
|
||||||
|
(\s -> (lf, fileSize s)) <$> getFileStatus lf
|
||||||
|
getsizes = liftIO $ catMaybes <$> mapM getsize lockfiles
|
||||||
|
go [] = return ()
|
||||||
|
go l = ifM (liftIO $ null <$> Lsof.query ("--" : map fst l))
|
||||||
|
( do
|
||||||
|
waitforit "to check stale git lock file"
|
||||||
|
l' <- getsizes
|
||||||
|
if l' == l
|
||||||
|
then liftIO $ mapM_ nukeFile (map fst l)
|
||||||
|
else go l'
|
||||||
|
, do
|
||||||
|
waitforit "for git lock file writer"
|
||||||
|
go =<< getsizes
|
||||||
|
)
|
||||||
|
waitforit why = do
|
||||||
|
notice ["Waiting for 60 seconds", why]
|
||||||
|
liftIO $ threadDelaySeconds $ Seconds 60
|
||||||
|
|
||||||
oneMegabyte :: Int
|
oneMegabyte :: Int
|
||||||
oneMegabyte = 1000000
|
oneMegabyte = 1000000
|
||||||
|
|
||||||
|
@ -136,3 +195,4 @@ oneHour = 60 * 60
|
||||||
|
|
||||||
oneDay :: Int
|
oneDay :: Int
|
||||||
oneDay = 24 * oneHour
|
oneDay = 24 * oneHour
|
||||||
|
|
||||||
|
|
|
@ -122,7 +122,6 @@ waitFor sig next = do
|
||||||
{- Initial scartup scan. The action should return once the scan is complete. -}
|
{- Initial scartup scan. The action should return once the scan is complete. -}
|
||||||
startupScan :: IO a -> Assistant a
|
startupScan :: IO a -> Assistant a
|
||||||
startupScan scanner = do
|
startupScan scanner = do
|
||||||
checkStaleIndexLock
|
|
||||||
liftAnnex $ showAction "scanning"
|
liftAnnex $ showAction "scanning"
|
||||||
alertWhile' startupScanAlert $ do
|
alertWhile' startupScanAlert $ do
|
||||||
r <- liftIO scanner
|
r <- liftIO scanner
|
||||||
|
@ -143,40 +142,6 @@ startupScan scanner = do
|
||||||
|
|
||||||
return (True, r)
|
return (True, r)
|
||||||
|
|
||||||
{- Detect when .git/index.lock exists and has no git process currently
|
|
||||||
- writing to it. This strongly suggests it is a stale lock file, because
|
|
||||||
- git writes the new index to index.lock and renames it over top.
|
|
||||||
-
|
|
||||||
- However, this could be on a network filesystem. Which is not very safe
|
|
||||||
- anyway (the assistant relies on being able to check when files have
|
|
||||||
- no writers to know when to commit them). Just in case, when the file
|
|
||||||
- appears stale, we delay for one minute, and check its size. If the size
|
|
||||||
- changed, delay for another minute, and so on.
|
|
||||||
-}
|
|
||||||
checkStaleIndexLock :: Assistant ()
|
|
||||||
checkStaleIndexLock = do
|
|
||||||
dir <- liftAnnex $ fromRepo Git.localGitDir
|
|
||||||
checkStale $ dir </> "index.lock"
|
|
||||||
checkStale :: FilePath -> Assistant ()
|
|
||||||
checkStale indexlock = go =<< getsize
|
|
||||||
where
|
|
||||||
getsize = liftIO $ catchMaybeIO $ fileSize <$> getFileStatus indexlock
|
|
||||||
go Nothing = return ()
|
|
||||||
go oldsize = ifM (liftIO $ null <$> Lsof.query ["--", indexlock])
|
|
||||||
( do
|
|
||||||
waitforit "to check stale"
|
|
||||||
size <- getsize
|
|
||||||
if size == oldsize
|
|
||||||
then liftIO $ nukeFile indexlock
|
|
||||||
else go size
|
|
||||||
, do
|
|
||||||
waitforit "for writer on"
|
|
||||||
go =<< getsize
|
|
||||||
)
|
|
||||||
waitforit why = do
|
|
||||||
notice ["Waiting for 60 seconds", why, indexlock]
|
|
||||||
liftIO $ threadDelaySeconds $ Seconds 60
|
|
||||||
|
|
||||||
{- Hardcoded ignores, passed to the DirWatcher so it can avoid looking
|
{- Hardcoded ignores, passed to the DirWatcher so it can avoid looking
|
||||||
- at the entire .git directory. Does not include .gitignores. -}
|
- at the entire .git directory. Does not include .gitignores. -}
|
||||||
ignored :: FilePath -> Bool
|
ignored :: FilePath -> Bool
|
||||||
|
|
|
@ -83,7 +83,10 @@ webAppThread assistantdata urlrenderer noannex listenhost postfirstrun onstartup
|
||||||
urlfile <- runThreadState st $ fromRepo gitAnnexUrlFile
|
urlfile <- runThreadState st $ fromRepo gitAnnexUrlFile
|
||||||
go addr webapp htmlshim (Just urlfile)
|
go addr webapp htmlshim (Just urlfile)
|
||||||
where
|
where
|
||||||
thread = namedThread "WebApp"
|
-- The webapp thread does not wait for the startupSanityCheckThread
|
||||||
|
-- to finish, so that the user interface remains responsive while
|
||||||
|
-- that's going on.
|
||||||
|
thread = namedThreadUnchecked "WebApp"
|
||||||
getreldir
|
getreldir
|
||||||
| noannex = return Nothing
|
| noannex = return Nothing
|
||||||
| otherwise = Just <$>
|
| otherwise = Just <$>
|
||||||
|
|
|
@ -31,9 +31,9 @@ data DaemonStatus = DaemonStatus
|
||||||
, scanComplete :: Bool
|
, scanComplete :: Bool
|
||||||
-- Time when a previous process of the daemon was running ok
|
-- Time when a previous process of the daemon was running ok
|
||||||
, lastRunning :: Maybe POSIXTime
|
, lastRunning :: Maybe POSIXTime
|
||||||
-- True when the sanity checker is running
|
-- True when the daily sanity checker is running
|
||||||
, sanityCheckRunning :: Bool
|
, sanityCheckRunning :: Bool
|
||||||
-- Last time the sanity checker ran
|
-- Last time the daily sanity checker ran
|
||||||
, lastSanityCheck :: Maybe POSIXTime
|
, lastSanityCheck :: Maybe POSIXTime
|
||||||
-- True when a scan for file transfers is running
|
-- True when a scan for file transfers is running
|
||||||
, transferScanRunning :: Bool
|
, transferScanRunning :: Bool
|
||||||
|
@ -62,6 +62,7 @@ data DaemonStatus = DaemonStatus
|
||||||
, alertNotifier :: NotificationBroadcaster
|
, alertNotifier :: NotificationBroadcaster
|
||||||
-- Broadcasts notifications when the syncRemotes change
|
-- Broadcasts notifications when the syncRemotes change
|
||||||
, syncRemotesNotifier :: NotificationBroadcaster
|
, syncRemotesNotifier :: NotificationBroadcaster
|
||||||
|
, startupSanityCheckNotifier :: NotificationBroadcaster
|
||||||
-- When the XMPP client is connected, this will contain the XMPP
|
-- When the XMPP client is connected, this will contain the XMPP
|
||||||
-- address.
|
-- address.
|
||||||
, xmppClientID :: Maybe ClientID
|
, xmppClientID :: Maybe ClientID
|
||||||
|
@ -93,4 +94,5 @@ newDaemonStatus = DaemonStatus
|
||||||
<*> newNotificationBroadcaster
|
<*> newNotificationBroadcaster
|
||||||
<*> newNotificationBroadcaster
|
<*> newNotificationBroadcaster
|
||||||
<*> newNotificationBroadcaster
|
<*> newNotificationBroadcaster
|
||||||
|
<*> newNotificationBroadcaster
|
||||||
<*> pure Nothing
|
<*> pure Nothing
|
||||||
|
|
|
@ -11,7 +11,11 @@ import Assistant.Monad
|
||||||
import Assistant.Types.ThreadName
|
import Assistant.Types.ThreadName
|
||||||
|
|
||||||
{- Information about a named thread that can be run. -}
|
{- Information about a named thread that can be run. -}
|
||||||
data NamedThread = NamedThread ThreadName (Assistant ())
|
data NamedThread = NamedThread Bool ThreadName (Assistant ())
|
||||||
|
|
||||||
namedThread :: String -> Assistant () -> NamedThread
|
namedThread :: String -> Assistant () -> NamedThread
|
||||||
namedThread = NamedThread . ThreadName
|
namedThread = NamedThread True . ThreadName
|
||||||
|
|
||||||
|
{- A named thread that can start running before the startup sanity check. -}
|
||||||
|
namedThreadUnchecked :: String -> Assistant () -> NamedThread
|
||||||
|
namedThreadUnchecked = NamedThread False . ThreadName
|
||||||
|
|
|
@ -10,6 +10,7 @@ module Locations (
|
||||||
fileKey,
|
fileKey,
|
||||||
keyPaths,
|
keyPaths,
|
||||||
keyPath,
|
keyPath,
|
||||||
|
annexDir,
|
||||||
objectDir,
|
objectDir,
|
||||||
gitAnnexLocation,
|
gitAnnexLocation,
|
||||||
gitAnnexLink,
|
gitAnnexLink,
|
||||||
|
|
4
debian/changelog
vendored
4
debian/changelog
vendored
|
@ -3,9 +3,7 @@ git-annex (4.20131003) UNRELEASED; urgency=low
|
||||||
* Automatically and safely detect and recover from dangling
|
* Automatically and safely detect and recover from dangling
|
||||||
.git/annex/index.lock files, which would prevent git from
|
.git/annex/index.lock files, which would prevent git from
|
||||||
committing to the git-annex branch, eg after a crash.
|
committing to the git-annex branch, eg after a crash.
|
||||||
* watcher: Detect at startup time when there is a stale .git/lock,
|
* assistant: Detect stale git lock files at startup time, and remove them.
|
||||||
and remove it so it does not interfere with the automatic
|
|
||||||
commits of changed files.
|
|
||||||
* addurl: Better sanitization of generated filenames.
|
* addurl: Better sanitization of generated filenames.
|
||||||
* Better sanitization of problem characters when generating URL and WORM
|
* Better sanitization of problem characters when generating URL and WORM
|
||||||
keys.
|
keys.
|
||||||
|
|
|
@ -39,3 +39,6 @@ fatal: Unable to create '/mnt/sdcard/reference/.git/index.lock': File exists.
|
||||||
> The '/mnt/sdcard/reference/.git/index.lock' lock file will now be
|
> The '/mnt/sdcard/reference/.git/index.lock' lock file will now be
|
||||||
> automatically dealt with. Have not done anything about the refs/remotes
|
> automatically dealt with. Have not done anything about the refs/remotes
|
||||||
> lock files yet. --[[Joey]]
|
> lock files yet. --[[Joey]]
|
||||||
|
>
|
||||||
|
> Now the assistant deals with all stale git lock files on startup.
|
||||||
|
> [[done]] --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue