better optimisation of add check

Now really only done in the startup scan.

It turns out to be quite hard for event handlers to know when the startup
scan is complete. I tried to make addWatch pass that info, but found
threading the state very difficult. For now, a quick hack, using the fast
flag.

Note that it's actually possible for inotify events to come in while the
startup scan is still ongoing. Due to my hack, the expensive check will
be done for files added in such inotify events.
This commit is contained in:
Joey Hess 2012-06-12 16:20:56 -04:00
parent 7d2c813396
commit b240418acc
2 changed files with 44 additions and 38 deletions

View file

@ -68,7 +68,7 @@ import System.INotify
type ChangeChan = TChan Change type ChangeChan = TChan Change
type Handler = FilePath -> Bool -> Annex (Maybe Change) type Handler = FilePath -> Annex (Maybe Change)
data Change = Change data Change = Change
{ changeTime :: UTCTime { changeTime :: UTCTime
@ -122,18 +122,25 @@ watch st = withINotify $ \i -> do
} }
-- The commit thread is started early, so that the user -- The commit thread is started early, so that the user
-- can immediately begin adding files and having them -- can immediately begin adding files and having them
-- committed, even while the inotify scan is taking place. -- committed, even while the startup scan is taking place.
_ <- forkIO $ commitThread st changechan _ <- forkIO $ commitThread st changechan
-- This does not return until the inotify scan is done. -- The fast flag is abused somewhat, to tell when the startup
-- scan is still running.
runStateMVar st $ do
setfast False
showAction "scanning"
-- This does not return until the startup scan is done.
-- That can take some time for large trees. -- That can take some time for large trees.
watchDir i "." (ignored . takeFileName) hooks watchDir i "." (ignored . takeFileName) hooks
runStateMVar st $ showAction "scanning" runStateMVar st $ setfast True
-- Notice any files that were deleted before inotify -- Notice any files that were deleted before inotify
-- was started. -- was started.
runStateMVar st $ do runStateMVar st $ do
inRepo $ Git.Command.run "add" [Param "--update"] inRepo $ Git.Command.run "add" [Param "--update"]
showAction "started" showAction "started"
waitForTermination waitForTermination
where
setfast v= Annex.changeState $ \s -> s { Annex.fast = v }
#else #else
watch = error "watch mode is so far only available on Linux" watch = error "watch mode is so far only available on Linux"
#endif #endif
@ -174,9 +181,9 @@ runChangeChan = atomically
- -
- Exceptions are ignored, otherwise a whole watcher thread could be crashed. - Exceptions are ignored, otherwise a whole watcher thread could be crashed.
-} -}
runHandler :: MVar Annex.AnnexState -> ChangeChan -> Handler -> FilePath -> Bool -> IO () runHandler :: MVar Annex.AnnexState -> ChangeChan -> Handler -> FilePath -> IO ()
runHandler st changechan handler file inscan = void $ do runHandler st changechan handler file = void $ do
r <- tryIO (runStateMVar st $ handler file inscan) r <- tryIO (runStateMVar st $ handler file)
case r of case r of
Left e -> print e Left e -> print e
Right Nothing -> noop Right Nothing -> noop
@ -200,34 +207,38 @@ noChange = return Nothing
- -
- Inotify will notice the new symlink, so this Handler does not stage it - Inotify will notice the new symlink, so this Handler does not stage it
- or return a Change, leaving that to onAddSymlink. - or return a Change, leaving that to onAddSymlink.
-
- During initial directory scan, this will be run for any files that
- are already checked into git. We don't want to turn those into symlinks,
- so do a check. This is rather expensive, but only happens during
- startup.
-} -}
onAdd :: Handler onAdd :: Handler
onAdd file False = do onAdd file = do
showStart "add" file ifM (Annex.getState Annex.fast)
handle =<< Command.Add.ingest file ( go -- initial directory scan is complete
noChange , do -- expensive check done only during startup scan
ifM (null <$> inRepo (Git.LsFiles.notInRepo False [file]))
( noChange
, go
)
)
where where
go = do
showStart "add" file
handle =<< Command.Add.ingest file
noChange
handle Nothing = showEndFail handle Nothing = showEndFail
handle (Just key) = do handle (Just key) = do
Command.Add.link file key True Command.Add.link file key True
showEndOk showEndOk
{- During initial directory scan, this will be run for any files that
- are already checked into git. We don't want to turn those into symlinks,
- so do a check. This is rather expensive, but only happens during
- startup, and when a directory is moved into the tree. -}
onAdd file True = do
liftIO $ putStrLn $ "expensive check for " ++ file
ifM (null <$> inRepo (Git.LsFiles.notInRepo False [file]))
( noChange
, onAdd file False
)
{- A symlink might be an arbitrary symlink, which is just added. {- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content - Or, if it is a git-annex symlink, ensure it points to the content
- before adding it. - before adding it.
-} -}
onAddSymlink :: Handler onAddSymlink :: Handler
onAddSymlink file _inscan = go =<< Backend.lookupFile file onAddSymlink file = go =<< Backend.lookupFile file
where where
go Nothing = addlink =<< liftIO (readSymbolicLink file) go Nothing = addlink =<< liftIO (readSymbolicLink file)
go (Just (key, _)) = do go (Just (key, _)) = do
@ -260,7 +271,7 @@ onAddSymlink file _inscan = go =<< Backend.lookupFile file
madeChange file "link" madeChange file "link"
onDel :: Handler onDel :: Handler
onDel file _inscan = do onDel file = do
Annex.Queue.addUpdateIndex =<< Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.unstageFile file) inRepo (Git.UpdateIndex.unstageFile file)
madeChange file "rm" madeChange file "rm"
@ -273,14 +284,14 @@ onDel file _inscan = do
- command to get the recursive list of files in the directory, so rm is - command to get the recursive list of files in the directory, so rm is
- just as good. -} - just as good. -}
onDelDir :: Handler onDelDir :: Handler
onDelDir dir _inscan = do onDelDir dir = do
Annex.Queue.addCommand "rm" Annex.Queue.addCommand "rm"
[Params "--quiet -r --cached --ignore-unmatch --"] [dir] [Params "--quiet -r --cached --ignore-unmatch --"] [dir]
madeChange dir "rmdir" madeChange dir "rmdir"
{- Called when there's an error with inotify. -} {- Called when there's an error with inotify. -}
onErr :: Handler onErr :: Handler
onErr msg _inscan = do onErr msg = do
warning msg warning msg
return Nothing return Nothing

View file

@ -15,11 +15,7 @@ import qualified System.Posix.Files as Files
import System.IO.Error import System.IO.Error
import Control.Exception (throw) import Control.Exception (throw)
{- A hook is passed some value to act on. type Hook a = Maybe (a -> IO ())
-
- The Bool is False when we're in the intial scan of a directory tree,
- rather than having received a genuine inotify event. -}
type Hook a = Maybe (a -> Bool -> IO ())
data WatchHooks = WatchHooks data WatchHooks = WatchHooks
{ addHook :: Hook FilePath { addHook :: Hook FilePath
@ -94,8 +90,8 @@ watchDir i dir ignored hooks
Nothing -> return () Nothing -> return ()
Just s Just s
| Files.isDirectory s -> recurse fullf | Files.isDirectory s -> recurse fullf
| Files.isSymbolicLink s -> addSymlinkHook <@?> f | Files.isSymbolicLink s -> addSymlinkHook <@> f
| Files.isRegularFile s -> addHook <@?> f | Files.isRegularFile s -> addHook <@> f
| otherwise -> return () | otherwise -> return ()
-- Ignore creation events for regular files, which won't be -- Ignore creation events for regular files, which won't be
@ -130,11 +126,10 @@ watchDir i dir ignored hooks
hashook h = isJust $ h hooks hashook h = isJust $ h hooks
runhook h f inscan runhook h f
| ignored f = noop | ignored f = noop
| otherwise = maybe noop (\a -> a (indir f) inscan) (h hooks) | otherwise = maybe noop (\a -> a $ indir f) (h hooks)
h <@> f = runhook h f False h <@> f = runhook h f
h <@?> f = runhook h f True
indir f = dir </> f indir f = dir </> f
@ -149,10 +144,10 @@ watchDir i dir ignored hooks
Just hook -> tooManyWatches hook dir Just hook -> tooManyWatches hook dir
| otherwise = throw e | otherwise = throw e
tooManyWatches :: (String -> Bool -> IO ()) -> FilePath -> IO () tooManyWatches :: (String -> IO ()) -> FilePath -> IO ()
tooManyWatches hook dir = do tooManyWatches hook dir = do
sysctlval <- querySysctl [Param maxwatches] :: IO (Maybe Integer) sysctlval <- querySysctl [Param maxwatches] :: IO (Maybe Integer)
hook (unlines $ basewarning : maybe withoutsysctl withsysctl sysctlval) False hook $ unlines $ basewarning : maybe withoutsysctl withsysctl sysctlval
where where
maxwatches = "fs.inotify.max_user_watches" maxwatches = "fs.inotify.max_user_watches"
basewarning = "Too many directories to watch! (Not watching " ++ dir ++")" basewarning = "Too many directories to watch! (Not watching " ++ dir ++")"