From 1d41ae5d2a25bd778be1c104ad53051a301ddc8e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 5 Jun 2020 11:03:21 -0400 Subject: [PATCH] init warning on stalled lock probe init: If lock probing stalls for a long time (eg a broken NFS server), display a message to let the user know what's taking so long. --- Annex/Init.hs | 34 ++++++++++++------- CHANGELOG | 2 ++ ..._9c77d8cf628025308c2561a8a46d2365._comment | 20 +++++++++++ ..._0d7eb254ec9ff7ba8b1831898008747f._comment | 16 +++++++++ 4 files changed, 59 insertions(+), 13 deletions(-) create mode 100644 doc/todo/faster___40__took_2_min__41___way_to_realize_the_need_in_pidlock__63__/comment_1_9c77d8cf628025308c2561a8a46d2365._comment create mode 100644 doc/todo/faster___40__took_2_min__41___way_to_realize_the_need_in_pidlock__63__/comment_2_0d7eb254ec9ff7ba8b1831898008747f._comment diff --git a/Annex/Init.hs b/Annex/Init.hs index 29adf57340..6ab6daffde 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -48,6 +48,7 @@ import Annex.InodeSentinal import Upgrade import Annex.Tmp import Utility.UserInfo +import Utility.ThreadScheduler #ifndef mingw32_HOST_OS import Annex.Perms import Utility.FileMode @@ -57,6 +58,7 @@ import Data.Either #endif import qualified Data.Map as M +import Control.Concurrent.Async checkCanInitialize :: Annex a -> Annex a checkCanInitialize a = inRepo (noAnnexFileContent . fmap fromRawFilePath . Git.repoWorkTree) >>= \case @@ -218,21 +220,27 @@ checkCrippledFileSystem = whenM probeCrippledFileSystem $ do (Git.Config.boolConfig False) probeLockSupport :: Annex Bool -probeLockSupport = do #ifdef mingw32_HOST_OS - return True +probeLockSupport = return True #else - withEventuallyCleanedOtherTmp $ \tmp -> do - let f = tmp "lockprobe" - mode <- annexFileMode - liftIO $ do - nukeFile f - let locktest = - Posix.lockExclusive (Just mode) f - >>= Posix.dropLock - ok <- isRight <$> tryNonAsync locktest - nukeFile f - return ok +probeLockSupport = withEventuallyCleanedOtherTmp $ \tmp -> do + let f = tmp "lockprobe" + mode <- annexFileMode + liftIO $ withAsync warnstall (const (go f mode)) + where + go f mode = do + nukeFile f + let locktest = + Posix.lockExclusive (Just mode) f + >>= Posix.dropLock + ok <- isRight <$> tryNonAsync locktest + nukeFile f + return ok + + warnstall = do + threadDelaySeconds (Seconds 10) + warningIO "Probing the filesystem for POSIX fcntl lock support is taking a long time." + warningIO "(Setting annex.pidlock will avoid this probe.)" #endif probeFifoSupport :: Annex Bool diff --git a/CHANGELOG b/CHANGELOG index 0c5520c71a..7365ca32e1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -22,6 +22,8 @@ git-annex (8.20200523) UNRELEASED; urgency=medium * export: Let concurrent transfers be done with -J or annex.jobs. * move --to, copy --to, mirror --to: When concurrency is enabled, run cleanup actions in separate job pool from uploads. + * init: If lock probing stalls for a long time (eg a broken NFS server), + display a message to let the user know what's taking so long. -- Joey Hess Tue, 26 May 2020 10:20:52 -0400 diff --git a/doc/todo/faster___40__took_2_min__41___way_to_realize_the_need_in_pidlock__63__/comment_1_9c77d8cf628025308c2561a8a46d2365._comment b/doc/todo/faster___40__took_2_min__41___way_to_realize_the_need_in_pidlock__63__/comment_1_9c77d8cf628025308c2561a8a46d2365._comment new file mode 100644 index 0000000000..857c471709 --- /dev/null +++ b/doc/todo/faster___40__took_2_min__41___way_to_realize_the_need_in_pidlock__63__/comment_1_9c77d8cf628025308c2561a8a46d2365._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-06-05T14:43:13Z" + content=""" +So your nfs filesystem is making a posix lock operation block for 2 minutes +before, I suppose, it makes it fail. + +Since git-annex has to do a posix lock operation, the only way to make it +faster would be to fix the nfs filesystem to not do that. Perhaps by disabling +posix locking altogether since it's apparently broken. + +But as far as what git-annex can do, I don't see any possible way to speed it up. + +I agree it could make sense to display a message, although that will also +be a message the vast majority of users who are not in this situation would +see. Maybe it could wait 1-10 seconds (probably 1000 times as long as it +will take in most situations) and if the probe is still ongoing, display a +message. +"""]] diff --git a/doc/todo/faster___40__took_2_min__41___way_to_realize_the_need_in_pidlock__63__/comment_2_0d7eb254ec9ff7ba8b1831898008747f._comment b/doc/todo/faster___40__took_2_min__41___way_to_realize_the_need_in_pidlock__63__/comment_2_0d7eb254ec9ff7ba8b1831898008747f._comment new file mode 100644 index 0000000000..bc80eaa103 --- /dev/null +++ b/doc/todo/faster___40__took_2_min__41___way_to_realize_the_need_in_pidlock__63__/comment_2_0d7eb254ec9ff7ba8b1831898008747f._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-06-05T14:54:31Z" + content=""" +Also, it could check if annex.pidlock is already set globally, and skip the +probe. Which maybe would work for you? + +I'm going to implement both the delayed message, and checking +annex.pidlock. + +(Timing out the probe after some period of time less than 2 minutes would +also be a possibility, but then there could be false positives on +filesystems that are just legitimately very slow. Doesn't seem a good +idea.) +"""]]