git-annex/Assistant/Threads/Watcher.hs

341 lines
10 KiB
Haskell
Raw Normal View History

2012-06-13 16:36:33 +00:00
{- git-annex assistant tree watcher
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
2013-01-28 04:14:17 +00:00
{-# LANGUAGE DeriveDataTypeable, CPP #-}
module Assistant.Threads.Watcher (
watchThread,
WatcherException(..),
checkCanWatch,
needLsof,
onAddSymlink,
runHandler,
) where
import Assistant.Common
2012-06-13 16:36:33 +00:00
import Assistant.DaemonStatus
2012-06-19 06:40:21 +00:00
import Assistant.Changes
2012-10-29 23:30:23 +00:00
import Assistant.Types.Changes
import Assistant.Alert
import Utility.DirWatcher
import Utility.DirWatcher.Types
import Utility.Lsof
import qualified Annex
2012-06-13 16:36:33 +00:00
import qualified Annex.Queue
import qualified Git
2012-06-13 16:36:33 +00:00
import qualified Git.UpdateIndex
import qualified Git.LsFiles as LsFiles
2012-06-13 16:36:33 +00:00
import qualified Backend
import Annex.Direct
import Annex.Content.Direct
2012-06-13 16:36:33 +00:00
import Annex.CatFile
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
import Annex.Link
import Annex.FileMatcher
import Annex.ReplaceFile
2012-06-13 16:36:33 +00:00
import Git.Types
import Config
import Utility.ThreadScheduler
2012-06-13 16:36:33 +00:00
import Data.Bits.Utils
import Data.Typeable
2012-06-13 16:36:33 +00:00
import qualified Data.ByteString.Lazy as L
import qualified Control.Exception as E
import Data.Time.Clock
2012-06-13 16:36:33 +00:00
checkCanWatch :: Annex ()
checkCanWatch
| canWatch = do
liftIO setupLsof
2012-09-13 04:57:52 +00:00
unlessM (liftIO (inPath "lsof") <||> Annex.getState Annex.force)
needLsof
| otherwise = error "watch mode is not available on this system"
needLsof :: Annex ()
needLsof = error $ unlines
[ "The lsof command is needed for watch mode to be safe, and is not in PATH."
, "To override lsof checks to ensure that files are not open for writing"
, "when added to the annex, you can use --force"
, "Be warned: This can corrupt data in the annex, and make fsck complain."
]
{- A special exception that can be thrown to pause or resume the watcher. -}
data WatcherException = PauseWatcher | ResumeWatcher
deriving (Show, Eq, Typeable)
instance E.Exception WatcherException
watchThread :: NamedThread
watchThread = namedThread "Watcher" $
ifM (liftAnnex $ annexAutoCommit <$> Annex.getGitConfig)
( runWatcher
, waitFor ResumeWatcher runWatcher
)
runWatcher :: Assistant ()
runWatcher = do
startup <- asIO1 startupScan
matcher <- liftAnnex $ largeFilesMatcher
direct <- liftAnnex isDirect
symlinkssupported <- liftAnnex $ coreSymlinks <$> Annex.getGitConfig
addhook <- hook $ if direct
then onAddDirect symlinkssupported matcher
else onAdd matcher
delhook <- hook onDel
addsymlinkhook <- hook $ onAddSymlink direct
deldirhook <- hook onDelDir
errhook <- hook onErr
let hooks = mkWatchHooks
{ addHook = addhook
, delHook = delhook
, addSymlinkHook = addsymlinkhook
, delDirHook = deldirhook
, errHook = errhook
}
handle <- liftIO $ watchDir "." ignored hooks startup
debug [ "watching", "."]
{- Let the DirWatcher thread run until signalled to pause it,
- then wait for a resume signal, and restart. -}
waitFor PauseWatcher $ do
liftIO $ stopWatchDir handle
waitFor ResumeWatcher runWatcher
where
hook a = Just <$> asIO2 (runHandler a)
2012-06-13 16:36:33 +00:00
waitFor :: WatcherException -> Assistant () -> Assistant ()
waitFor sig next = do
r <- liftIO $ (E.try pause :: IO (Either E.SomeException ()))
case r of
Left e -> case E.fromException e of
Just s
| s == sig -> next
_ -> noop
_ -> noop
where
pause = runEvery (Seconds 86400) noop
2012-06-19 02:13:39 +00:00
{- Initial scartup scan. The action should return once the scan is complete. -}
startupScan :: IO a -> Assistant a
startupScan scanner = do
liftAnnex $ showAction "scanning"
2012-10-29 20:49:47 +00:00
alertWhile' startupScanAlert $ do
r <- liftIO $ scanner
2012-06-19 02:13:39 +00:00
2012-07-29 23:05:51 +00:00
-- Notice any files that were deleted before
-- watching was started.
top <- liftAnnex $ fromRepo Git.repoPath
(fs, cleanup) <- liftAnnex $ inRepo $ LsFiles.deleted [top]
forM_ fs $ \f -> do
liftAnnex $ onDel' f
maybe noop recordChange =<< madeChange f RmChange
void $ liftIO $ cleanup
liftAnnex $ showAction "started"
liftIO $ putStrLn ""
modifyDaemonStatus_ $ \s -> s { scanComplete = True }
2012-07-30 20:32:32 +00:00
return (True, r)
2012-06-19 02:13:39 +00:00
2012-06-13 16:36:33 +00:00
ignored :: FilePath -> Bool
ignored = ig . takeFileName
where
ig ".git" = True
ig ".gitignore" = True
ig ".gitattributes" = True
2013-01-28 04:13:22 +00:00
#ifdef darwin_HOST_OS
ig ".DS_Store" = True
#endif
ig _ = False
type Handler = FilePath -> Maybe FileStatus -> Assistant (Maybe Change)
2012-06-13 16:36:33 +00:00
{- Runs an action handler, and if there was a change, adds it to the ChangeChan.
2012-06-13 16:36:33 +00:00
-
- Exceptions are ignored, otherwise a whole watcher thread could be crashed.
-}
runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant ()
runHandler handler file filestatus = void $ do
r <- tryIO <~> handler (normalize file) filestatus
2012-06-13 16:36:33 +00:00
case r of
Left e -> liftIO $ print e
2012-06-13 16:36:33 +00:00
Right Nothing -> noop
Right (Just change) -> do
-- Just in case the commit thread is not
-- flushing the queue fast enough.
liftAnnex $ Annex.Queue.flushWhenFull
2012-10-29 23:30:23 +00:00
recordChange change
where
normalize f
| "./" `isPrefixOf` file = drop 2 f
| otherwise = f
2012-06-13 16:36:33 +00:00
{- Small files are added to git as-is, while large ones go into the annex. -}
add :: FileMatcher -> FilePath -> Assistant (Maybe Change)
add bigfilematcher file = ifM (liftAnnex $ checkFileMatcher bigfilematcher file)
( pendingAddChange file
, do
liftAnnex $ Annex.Queue.addCommand "add"
[Params "--force --"] [file]
madeChange file AddFileChange
)
onAdd :: FileMatcher -> Handler
onAdd matcher file filestatus
| maybe False isRegularFile filestatus = add matcher file
2012-10-29 23:30:23 +00:00
| otherwise = noChange
2012-06-13 16:36:33 +00:00
{- In direct mode, add events are received for both new files, and
- modified existing files.
-}
onAddDirect :: Bool -> FileMatcher -> Handler
onAddDirect symlinkssupported matcher file fs = do
v <- liftAnnex $ catKeyFile file
case (v, fs) of
(Just key, Just filestatus) ->
2013-02-22 21:01:48 +00:00
ifM (liftAnnex $ sameFileStatus key filestatus)
{- It's possible to get an add event for
- an existing file that is not
- really modified, but it might have
- just been deleted and been put back,
- so it symlink is restaged to make sure. -}
( ifM (scanComplete <$> getDaemonStatus)
( do
link <- liftAnnex $ inRepo $ gitAnnexLink file key
addLink file link (Just key)
, noChange
)
, guardSymlinkStandin (Just key) $ do
debug ["changed direct", file]
liftAnnex $ changedDirect key file
add matcher file
)
_ -> guardSymlinkStandin Nothing $ do
debug ["add direct", file]
add matcher file
where
{- On a filesystem without symlinks, we'll get changes for regular
- files that git uses to stand-in for symlinks. Detect when
- this happens, and stage the symlink, rather than annexing the
- file. -}
guardSymlinkStandin mk a
| symlinkssupported = a
| otherwise = do
linktarget <- liftAnnex $ getAnnexLinkTarget file
case linktarget of
Nothing -> a
Just lt -> do
case fileKey $ takeFileName lt of
Nothing -> noop
Just key -> void $ liftAnnex $
addAssociatedFile key file
onAddSymlink' linktarget mk True file fs
2012-06-13 16:36:33 +00:00
{- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content
- before adding it.
-}
onAddSymlink :: Bool -> Handler
onAddSymlink isdirect file filestatus = do
linktarget <- liftIO (catchMaybeIO $ readSymbolicLink file)
kv <- liftAnnex (Backend.lookupFile file)
onAddSymlink' linktarget (fmap fst kv) isdirect file filestatus
onAddSymlink' :: Maybe String -> Maybe Key -> Bool -> Handler
onAddSymlink' linktarget mk isdirect file filestatus = go mk
where
go (Just key) = do
when isdirect $
liftAnnex $ void $ addAssociatedFile key file
link <- liftAnnex $ inRepo $ gitAnnexLink file key
if linktarget == Just link
then ensurestaged (Just link) =<< getDaemonStatus
else do
unless isdirect $
liftAnnex $ replaceFile file $
makeAnnexLink link
addLink file link (Just key)
-- other symlink, not git-annex
go Nothing = ensurestaged linktarget =<< getDaemonStatus
{- This is often called on symlinks that are already
- staged correctly. A symlink may have been deleted
- and being re-added, or added when the watcher was
- not running. So they're normally restaged to make sure.
-
- As an optimisation, during the startup scan, avoid
- restaging everything. Only links that were created since
- the last time the daemon was running are staged.
- (If the daemon has never ran before, avoid staging
- links too.)
-}
ensurestaged (Just link) daemonstatus
| scanComplete daemonstatus = addLink file link mk
| otherwise = case filestatus of
2012-10-29 15:58:29 +00:00
Just s
2012-10-29 23:30:23 +00:00
| not (afterLastDaemonRun (statusChangeTime s) daemonstatus) -> noChange
_ -> addLink file link mk
ensurestaged Nothing _ = noChange
{- For speed, tries to reuse the existing blob for symlink target. -}
addLink :: FilePath -> FilePath -> Maybe Key -> Assistant (Maybe Change)
addLink file link mk = do
debug ["add symlink", file]
liftAnnex $ do
v <- catObjectDetails $ Ref $ ':':file
case v of
Just (currlink, sha)
| s2w8 link == L.unpack currlink ->
stageSymlink file sha
_ -> stageSymlink file =<< hashSymlink link
madeChange file $ LinkChange mk
2012-06-13 16:36:33 +00:00
onDel :: Handler
onDel file _ = do
debug ["file deleted", file]
liftAnnex $ onDel' file
2012-10-29 23:30:23 +00:00
madeChange file RmChange
2012-06-13 16:36:33 +00:00
onDel' :: FilePath -> Annex ()
onDel' file = do
whenM isDirect $ do
mkey <- catKeyFile file
case mkey of
Nothing -> noop
Just key -> void $ removeAssociatedFile key file
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.unstageFile file)
2012-06-13 16:36:33 +00:00
{- A directory has been deleted, or moved, so tell git to remove anything
- that was inside it from its cache. Since it could reappear at any time,
- use --cached to only delete it from the index.
2012-06-13 16:36:33 +00:00
-
- This queues up a lot of RmChanges, which assists the Committer in
- pairing up renamed files when the directory was renamed. -}
2012-06-13 16:36:33 +00:00
onDelDir :: Handler
onDelDir dir _ = do
debug ["directory deleted", dir]
(fs, clean) <- liftAnnex $ inRepo $ LsFiles.deleted [dir]
liftAnnex $ mapM_ onDel' fs
-- Get the events queued up as fast as possible, so the
-- committer sees them all in one block.
now <- liftIO getCurrentTime
recordChanges $ map (\f -> Change now f RmChange) fs
void $ liftIO $ clean
liftAnnex $ Annex.Queue.flushWhenFull
noChange
2012-06-13 16:36:33 +00:00
{- Called when there's an error with inotify or kqueue. -}
2012-06-13 16:36:33 +00:00
onErr :: Handler
onErr msg _ = do
liftAnnex $ warning msg
void $ addAlert $ warningAlert "watcher" msg
2012-10-29 23:30:23 +00:00
noChange