better optimisation of add check

Now really only done in the startup scan.

It turns out to be quite hard for event handlers to know when the startup
scan is complete. I tried to make addWatch pass that info, but found
threading the state very difficult. For now, a quick hack, using the fast
flag.

Note that it's actually possible for inotify events to come in while the
startup scan is still ongoing. Due to my hack, the expensive check will
be done for files added in such inotify events.
This commit is contained in:
Joey Hess 2012-06-12 16:20:56 -04:00
parent 7d2c813396
commit b240418acc
2 changed files with 44 additions and 38 deletions

View file

@ -68,7 +68,7 @@ import System.INotify
type ChangeChan = TChan Change
type Handler = FilePath -> Bool -> Annex (Maybe Change)
type Handler = FilePath -> Annex (Maybe Change)
data Change = Change
{ changeTime :: UTCTime
@ -122,18 +122,25 @@ watch st = withINotify $ \i -> do
}
-- The commit thread is started early, so that the user
-- can immediately begin adding files and having them
-- committed, even while the inotify scan is taking place.
-- committed, even while the startup scan is taking place.
_ <- forkIO $ commitThread st changechan
-- This does not return until the inotify scan is done.
-- The fast flag is abused somewhat, to tell when the startup
-- scan is still running.
runStateMVar st $ do
setfast False
showAction "scanning"
-- This does not return until the startup scan is done.
-- That can take some time for large trees.
watchDir i "." (ignored . takeFileName) hooks
runStateMVar st $ showAction "scanning"
runStateMVar st $ setfast True
-- Notice any files that were deleted before inotify
-- was started.
runStateMVar st $ do
inRepo $ Git.Command.run "add" [Param "--update"]
showAction "started"
waitForTermination
where
setfast v= Annex.changeState $ \s -> s { Annex.fast = v }
#else
watch = error "watch mode is so far only available on Linux"
#endif
@ -174,9 +181,9 @@ runChangeChan = atomically
-
- Exceptions are ignored, otherwise a whole watcher thread could be crashed.
-}
runHandler :: MVar Annex.AnnexState -> ChangeChan -> Handler -> FilePath -> Bool -> IO ()
runHandler st changechan handler file inscan = void $ do
r <- tryIO (runStateMVar st $ handler file inscan)
runHandler :: MVar Annex.AnnexState -> ChangeChan -> Handler -> FilePath -> IO ()
runHandler st changechan handler file = void $ do
r <- tryIO (runStateMVar st $ handler file)
case r of
Left e -> print e
Right Nothing -> noop
@ -200,34 +207,38 @@ noChange = return Nothing
-
- Inotify will notice the new symlink, so this Handler does not stage it
- or return a Change, leaving that to onAddSymlink.
-
- During initial directory scan, this will be run for any files that
- are already checked into git. We don't want to turn those into symlinks,
- so do a check. This is rather expensive, but only happens during
- startup.
-}
onAdd :: Handler
onAdd file False = do
showStart "add" file
handle =<< Command.Add.ingest file
noChange
onAdd file = do
ifM (Annex.getState Annex.fast)
( go -- initial directory scan is complete
, do -- expensive check done only during startup scan
ifM (null <$> inRepo (Git.LsFiles.notInRepo False [file]))
( noChange
, go
)
)
where
go = do
showStart "add" file
handle =<< Command.Add.ingest file
noChange
handle Nothing = showEndFail
handle (Just key) = do
Command.Add.link file key True
showEndOk
{- During initial directory scan, this will be run for any files that
- are already checked into git. We don't want to turn those into symlinks,
- so do a check. This is rather expensive, but only happens during
- startup, and when a directory is moved into the tree. -}
onAdd file True = do
liftIO $ putStrLn $ "expensive check for " ++ file
ifM (null <$> inRepo (Git.LsFiles.notInRepo False [file]))
( noChange
, onAdd file False
)
{- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content
- before adding it.
-}
onAddSymlink :: Handler
onAddSymlink file _inscan = go =<< Backend.lookupFile file
onAddSymlink file = go =<< Backend.lookupFile file
where
go Nothing = addlink =<< liftIO (readSymbolicLink file)
go (Just (key, _)) = do
@ -260,7 +271,7 @@ onAddSymlink file _inscan = go =<< Backend.lookupFile file
madeChange file "link"
onDel :: Handler
onDel file _inscan = do
onDel file = do
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.unstageFile file)
madeChange file "rm"
@ -273,14 +284,14 @@ onDel file _inscan = do
- command to get the recursive list of files in the directory, so rm is
- just as good. -}
onDelDir :: Handler
onDelDir dir _inscan = do
onDelDir dir = do
Annex.Queue.addCommand "rm"
[Params "--quiet -r --cached --ignore-unmatch --"] [dir]
madeChange dir "rmdir"
{- Called when there's an error with inotify. -}
onErr :: Handler
onErr msg _inscan = do
onErr msg = do
warning msg
return Nothing

View file

@ -15,11 +15,7 @@ import qualified System.Posix.Files as Files
import System.IO.Error
import Control.Exception (throw)
{- A hook is passed some value to act on.
-
- The Bool is False when we're in the intial scan of a directory tree,
- rather than having received a genuine inotify event. -}
type Hook a = Maybe (a -> Bool -> IO ())
type Hook a = Maybe (a -> IO ())
data WatchHooks = WatchHooks
{ addHook :: Hook FilePath
@ -94,8 +90,8 @@ watchDir i dir ignored hooks
Nothing -> return ()
Just s
| Files.isDirectory s -> recurse fullf
| Files.isSymbolicLink s -> addSymlinkHook <@?> f
| Files.isRegularFile s -> addHook <@?> f
| Files.isSymbolicLink s -> addSymlinkHook <@> f
| Files.isRegularFile s -> addHook <@> f
| otherwise -> return ()
-- Ignore creation events for regular files, which won't be
@ -130,11 +126,10 @@ watchDir i dir ignored hooks
hashook h = isJust $ h hooks
runhook h f inscan
runhook h f
| ignored f = noop
| otherwise = maybe noop (\a -> a (indir f) inscan) (h hooks)
h <@> f = runhook h f False
h <@?> f = runhook h f True
| otherwise = maybe noop (\a -> a $ indir f) (h hooks)
h <@> f = runhook h f
indir f = dir </> f
@ -149,10 +144,10 @@ watchDir i dir ignored hooks
Just hook -> tooManyWatches hook dir
| otherwise = throw e
tooManyWatches :: (String -> Bool -> IO ()) -> FilePath -> IO ()
tooManyWatches :: (String -> IO ()) -> FilePath -> IO ()
tooManyWatches hook dir = do
sysctlval <- querySysctl [Param maxwatches] :: IO (Maybe Integer)
hook (unlines $ basewarning : maybe withoutsysctl withsysctl sysctlval) False
hook $ unlines $ basewarning : maybe withoutsysctl withsysctl sysctlval
where
maxwatches = "fs.inotify.max_user_watches"
basewarning = "Too many directories to watch! (Not watching " ++ dir ++")"