git-annex/Assistant/Threads/Watcher.hs

252 lines
8.2 KiB
Haskell
Raw Normal View History

2012-06-13 16:36:33 +00:00
{- git-annex assistant tree watcher
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.Threads.Watcher (
watchThread,
checkCanWatch,
needLsof,
stageSymlink,
onAddSymlink,
runHandler,
) where
import Assistant.Common
2012-06-13 16:36:33 +00:00
import Assistant.ThreadedMonad
import Assistant.DaemonStatus
2012-06-19 06:40:21 +00:00
import Assistant.Changes
import Assistant.TransferQueue
import Assistant.Alert
import Logs.Transfer
import Utility.DirWatcher
import Utility.Types.DirWatcher
import qualified Annex
2012-06-13 16:36:33 +00:00
import qualified Annex.Queue
import qualified Git.Command
import qualified Git.UpdateIndex
import qualified Git.HashObject
import qualified Git.LsFiles
import qualified Backend
import qualified Command.Add
2012-06-13 16:36:33 +00:00
import Annex.Content
import Annex.CatFile
import Git.Types
import Data.Bits.Utils
import qualified Data.ByteString.Lazy as L
thisThread :: ThreadName
thisThread = "Watcher"
checkCanWatch :: Annex ()
checkCanWatch
| canWatch =
unlessM (liftIO (inPath "lsof") <||> Annex.getState Annex.force) $
needLsof
| otherwise = error "watch mode is not available on this system"
needLsof :: Annex ()
needLsof = error $ unlines
[ "The lsof command is needed for watch mode to be safe, and is not in PATH."
, "To override lsof checks to ensure that files are not open for writing"
, "when added to the annex, you can use --force"
, "Be warned: This can corrupt data in the annex, and make fsck complain."
]
watchThread :: ThreadState -> DaemonStatusHandle -> TransferQueue -> ChangeChan -> IO ()
watchThread st dstatus transferqueue changechan = do
void $ watchDir "." ignored hooks startup
debug thisThread [ "watching", "."]
2012-06-13 16:36:33 +00:00
where
startup = startupScan st dstatus
hook a = Just $ runHandler thisThread st dstatus transferqueue changechan a
2012-06-13 16:36:33 +00:00
hooks = WatchHooks
{ addHook = hook onAdd
, delHook = hook onDel
, addSymlinkHook = hook onAddSymlink
, delDirHook = hook onDelDir
, errHook = hook onErr
}
2012-06-19 02:13:39 +00:00
{- Initial scartup scan. The action should return once the scan is complete. -}
2012-07-30 20:32:32 +00:00
startupScan :: ThreadState -> DaemonStatusHandle -> IO a -> IO a
startupScan st dstatus scanner = do
2012-07-29 17:22:08 +00:00
runThreadState st $ showAction "scanning"
2012-07-30 20:32:32 +00:00
r <- alertWhile' dstatus startupScanAlert $ do
r <- scanner
2012-06-19 02:13:39 +00:00
2012-07-29 23:05:51 +00:00
-- Notice any files that were deleted before
-- watching was started.
runThreadState st $ do
inRepo $ Git.Command.run "add" [Param "--update"]
showAction "started"
modifyDaemonStatus_ dstatus $ \s -> s { scanComplete = True }
2012-07-30 20:32:32 +00:00
return (True, r)
2012-06-19 02:13:39 +00:00
2012-07-30 20:32:32 +00:00
return r
2012-06-13 16:36:33 +00:00
ignored :: FilePath -> Bool
ignored = ig . takeFileName
where
ig ".git" = True
ig ".gitignore" = True
ig ".gitattributes" = True
ig _ = False
type Handler = ThreadName -> FilePath -> Maybe FileStatus -> DaemonStatusHandle -> TransferQueue -> Annex (Maybe Change)
2012-06-13 16:36:33 +00:00
{- Runs an action handler, inside the Annex monad, and if there was a
- change, adds it to the ChangeChan.
-
- Exceptions are ignored, otherwise a whole watcher thread could be crashed.
-}
runHandler :: ThreadName -> ThreadState -> DaemonStatusHandle -> TransferQueue -> ChangeChan -> Handler -> FilePath -> Maybe FileStatus -> IO ()
runHandler threadname st dstatus transferqueue changechan handler file filestatus = void $ do
2012-06-13 16:36:33 +00:00
r <- tryIO go
case r of
Left e -> print e
Right Nothing -> noop
2012-06-22 17:39:44 +00:00
Right (Just change) -> recordChange changechan change
2012-06-13 16:36:33 +00:00
where
go = runThreadState st $ handler threadname file filestatus dstatus transferqueue
2012-06-13 16:36:33 +00:00
{- During initial directory scan, this will be run for any regular files
- that are already checked into git. We don't want to turn those into
- symlinks, so do a check. This is rather expensive, but only happens
- during startup.
2012-06-13 16:36:33 +00:00
-
- It's possible for the file to still be open for write by some process.
- This can happen in a few ways; one is if two processes had the file open
- and only one has just closed it. We want to avoid adding a file to the
- annex that is open for write, to avoid anything being able to change it.
2012-06-13 16:36:33 +00:00
-
- We could run lsof on the file here to check for other writers.
- But, that's slow, and even if there is currently a writer, we will want
- to add the file *eventually*. Instead, the file is locked down as a hard
- link in a temp directory, with its write bits disabled, for later
- checking with lsof, and a Change is returned containing a KeySource
- using that hard link. The committer handles running lsof and finishing
- the add.
2012-06-13 16:36:33 +00:00
-}
onAdd :: Handler
onAdd threadname file filestatus dstatus _
| maybe False isRegularFile filestatus = do
ifM (scanComplete <$> liftIO (getDaemonStatus dstatus))
( go
, ifM (null <$> inRepo (Git.LsFiles.notInRepo False [file]))
( noChange
, go
)
2012-06-13 16:36:33 +00:00
)
| otherwise = noChange
2012-06-13 16:36:33 +00:00
where
go = do
liftIO $ debug threadname ["file added", file]
pendingAddChange =<< Command.Add.lockDown file
2012-06-13 16:36:33 +00:00
{- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content
- before adding it.
-}
onAddSymlink :: Handler
onAddSymlink threadname file filestatus dstatus transferqueue = go =<< Backend.lookupFile file
2012-06-13 16:36:33 +00:00
where
go (Just (key, _)) = do
link <- calcGitLink file key
ifM ((==) link <$> liftIO (readSymbolicLink file))
( do
s <- liftIO $ getDaemonStatus dstatus
checkcontent key s
ensurestaged link s
2012-06-13 16:36:33 +00:00
, do
liftIO $ debug threadname ["fix symlink", file]
2012-06-13 16:36:33 +00:00
liftIO $ removeFile file
liftIO $ createSymbolicLink link file
addlink link
)
go Nothing = do -- other symlink
link <- liftIO (readSymbolicLink file)
ensurestaged link =<< liftIO (getDaemonStatus dstatus)
2012-06-13 16:36:33 +00:00
{- This is often called on symlinks that are already
- staged correctly. A symlink may have been deleted
- and being re-added, or added when the watcher was
- not running. So they're normally restaged to make sure.
-
- As an optimisation, during the status scan, avoid
- restaging everything. Only links that were created since
- the last time the daemon was running are staged.
- (If the daemon has never ran before, avoid staging
- links too.)
-}
ensurestaged link daemonstatus
| scanComplete daemonstatus = addlink link
| otherwise = case filestatus of
Just s
| not (afterLastDaemonRun (statusChangeTime s) daemonstatus) -> noChange
2012-06-13 16:36:33 +00:00
_ -> addlink link
{- For speed, tries to reuse the existing blob for
- the symlink target. -}
addlink link = do
liftIO $ debug threadname ["add symlink", file]
2012-06-13 16:36:33 +00:00
v <- catObjectDetails $ Ref $ ':':file
case v of
Just (currlink, sha)
| s2w8 link == L.unpack currlink ->
stageSymlink file sha
_ -> do
sha <- inRepo $
Git.HashObject.hashObject BlobObject link
stageSymlink file sha
madeChange file LinkChange
2012-06-13 16:36:33 +00:00
{- When a new link appears, after the startup scan,
- try to get the key's content. -}
checkcontent key daemonstatus
| scanComplete daemonstatus = unlessM (inAnnex key) $
queueTransfers Next transferqueue dstatus
key (Just file) Download
| otherwise = noop
2012-06-13 16:36:33 +00:00
onDel :: Handler
onDel threadname file _ _dstatus _ = do
liftIO $ debug threadname ["file deleted", file]
2012-06-13 16:36:33 +00:00
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.unstageFile file)
madeChange file RmChange
2012-06-13 16:36:33 +00:00
{- A directory has been deleted, or moved, so tell git to remove anything
- that was inside it from its cache. Since it could reappear at any time,
- use --cached to only delete it from the index.
-
- Note: This could use unstageFile, but would need to run another git
- command to get the recursive list of files in the directory, so rm is
- just as good. -}
onDelDir :: Handler
onDelDir threadname dir _ _dstatus _ = do
liftIO $ debug threadname ["directory deleted", dir]
2012-06-13 16:36:33 +00:00
Annex.Queue.addCommand "rm"
[Params "--quiet -r --cached --ignore-unmatch --"] [dir]
madeChange dir RmDirChange
2012-06-13 16:36:33 +00:00
{- Called when there's an error with inotify. -}
onErr :: Handler
onErr _ msg _ _dstatus _ = do
2012-06-13 16:36:33 +00:00
warning msg
return Nothing
{- Adds a symlink to the index, without ever accessing the actual symlink
- on disk. This avoids a race if git add is used, where the symlink is
- changed to something else immediately after creation.
-}
2012-06-13 16:36:33 +00:00
stageSymlink :: FilePath -> Sha -> Annex ()
stageSymlink file sha =
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageSymlink file sha)