optimise link staging at startup
Now it starts really, really fast! Down from 15 minutes or so on my big tree to around 1 minute. The trick is to remember the last time the daemon was running. Links with a ctime from before that point don't need to be restaged on startup (as long as they are correct), since the old daemon would have handled them already. We also assume that if the daemon has never run before, any links that already exist are good. The pre-commit hook fixes links, so this should be a safe assumption. Adds another MVar holding a DaemonStatus data structure. Also allowed getting rid of the Annex.Fast hack. This data structure will probably grow a lot of details about the daemon's status, that will later be used by the webapp's UI. The code to actually track when the daemon was last running is not written yet. It's 3 am.
This commit is contained in:
parent
12dbb9d1d0
commit
c31ddeda84
1 changed files with 97 additions and 47 deletions
144
Command/Watch.hs
144
Command/Watch.hs
|
@ -24,12 +24,18 @@
|
||||||
- Thread 5: committer
|
- Thread 5: committer
|
||||||
- Waits for changes to occur, and runs the git queue to update its
|
- Waits for changes to occur, and runs the git queue to update its
|
||||||
- index, then commits.
|
- index, then commits.
|
||||||
|
- Thread 6: status logger
|
||||||
|
- Wakes up periodically and records the daemon's status to disk.
|
||||||
-
|
-
|
||||||
- State MVar:
|
- State MVar:
|
||||||
- The Annex state is stored here, which allows resuscitating the
|
- The Annex state is stored here, which allows resuscitating the
|
||||||
- Annex monad in IO actions run by the inotify and committer
|
- Annex monad in IO actions run by the inotify and committer
|
||||||
- threads. Thus, a single state is shared amoung the threads, and
|
- threads. Thus, a single state is shared amoung the threads, and
|
||||||
- only one at a time can access it.
|
- only one at a time can access it.
|
||||||
|
- DaemonStatus MVar:
|
||||||
|
- The daemon's current status. This MVar should only be manipulated
|
||||||
|
- from inside the Annex monad, which ensures it's accessed only
|
||||||
|
- after the State MVar.
|
||||||
- ChangeChan STM TChan:
|
- ChangeChan STM TChan:
|
||||||
- Changes are indicated by writing to this channel. The committer
|
- Changes are indicated by writing to this channel. The committer
|
||||||
- reads from it.
|
- reads from it.
|
||||||
|
@ -59,6 +65,7 @@ import Control.Concurrent
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.STM
|
||||||
import Data.Time.Clock
|
import Data.Time.Clock
|
||||||
import Data.Bits.Utils
|
import Data.Bits.Utils
|
||||||
|
import System.Posix.Types
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
#if defined linux_HOST_OS
|
#if defined linux_HOST_OS
|
||||||
|
@ -66,9 +73,28 @@ import Utility.Inotify
|
||||||
import System.INotify
|
import System.INotify
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
data DaemonStatus = DaemonStatus
|
||||||
|
-- False when the daemon is performing its startup scan
|
||||||
|
{ scanComplete :: Bool
|
||||||
|
-- Time when a previous process of the daemon was running ok
|
||||||
|
, lastRunning :: Maybe EpochTime
|
||||||
|
}
|
||||||
|
|
||||||
|
newDaemonStatus :: Annex DaemonStatus
|
||||||
|
newDaemonStatus = return $ DaemonStatus
|
||||||
|
{ scanComplete = False
|
||||||
|
, lastRunning = Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
getDaemonStatus :: MVar DaemonStatus -> Annex DaemonStatus
|
||||||
|
getDaemonStatus = liftIO . readMVar
|
||||||
|
|
||||||
|
modifyDaemonStatus :: MVar DaemonStatus -> (DaemonStatus -> DaemonStatus) -> Annex ()
|
||||||
|
modifyDaemonStatus status a = liftIO $ modifyMVar_ status (return . a)
|
||||||
|
|
||||||
type ChangeChan = TChan Change
|
type ChangeChan = TChan Change
|
||||||
|
|
||||||
type Handler = FilePath -> Maybe FileStatus -> Annex (Maybe Change)
|
type Handler = FilePath -> Maybe FileStatus -> MVar DaemonStatus -> Annex (Maybe Change)
|
||||||
|
|
||||||
data Change = Change
|
data Change = Change
|
||||||
{ changeTime :: UTCTime
|
{ changeTime :: UTCTime
|
||||||
|
@ -96,43 +122,40 @@ start :: Bool -> Bool -> CommandStart
|
||||||
start foreground stopdaemon = notBareRepo $ do
|
start foreground stopdaemon = notBareRepo $ do
|
||||||
if stopdaemon
|
if stopdaemon
|
||||||
then liftIO . stopDaemon =<< fromRepo gitAnnexPidFile
|
then liftIO . stopDaemon =<< fromRepo gitAnnexPidFile
|
||||||
else withStateMVar $ startDaemon (not foreground)
|
else withStateMVar $ startDaemon foreground
|
||||||
stop
|
stop
|
||||||
|
|
||||||
startDaemon :: Bool -> MVar Annex.AnnexState -> Annex ()
|
startDaemon :: Bool -> MVar Annex.AnnexState -> Annex ()
|
||||||
startDaemon False st = do
|
startDaemon foreground st
|
||||||
showStart "watch" "."
|
| foreground = do
|
||||||
liftIO $ watch st
|
showStart "watch" "."
|
||||||
startDaemon True st = do
|
go id
|
||||||
logfd <- liftIO . openLog =<< fromRepo gitAnnexLogFile
|
| otherwise = do
|
||||||
pidfile <- fromRepo gitAnnexPidFile
|
logfd <- liftIO . openLog =<< fromRepo gitAnnexLogFile
|
||||||
liftIO $ daemonize logfd (Just pidfile) False $ watch st
|
pidfile <- fromRepo gitAnnexPidFile
|
||||||
|
go $ daemonize logfd (Just pidfile) False
|
||||||
|
where
|
||||||
|
go a = do
|
||||||
|
daemonstatus <- newDaemonStatus
|
||||||
|
liftIO $ a $ do
|
||||||
|
dstatus <- newMVar daemonstatus
|
||||||
|
changechan <- runChangeChan newTChan
|
||||||
|
watch st dstatus changechan
|
||||||
|
|
||||||
watch :: MVar Annex.AnnexState -> IO ()
|
watch :: MVar Annex.AnnexState -> MVar DaemonStatus -> ChangeChan -> IO ()
|
||||||
#if defined linux_HOST_OS
|
#if defined linux_HOST_OS
|
||||||
watch st = withINotify $ \i -> do
|
watch st dstatus changechan = withINotify $ \i -> do
|
||||||
changechan <- runChangeChan newTChan
|
|
||||||
let hook a = Just $ runHandler st changechan a
|
|
||||||
let hooks = WatchHooks
|
|
||||||
{ addHook = hook onAdd
|
|
||||||
, delHook = hook onDel
|
|
||||||
, addSymlinkHook = hook onAddSymlink
|
|
||||||
, delDirHook = hook onDelDir
|
|
||||||
, errHook = hook onErr
|
|
||||||
}
|
|
||||||
-- The commit thread is started early, so that the user
|
-- The commit thread is started early, so that the user
|
||||||
-- can immediately begin adding files and having them
|
-- can immediately begin adding files and having them
|
||||||
-- committed, even while the startup scan is taking place.
|
-- committed, even while the startup scan is taking place.
|
||||||
_ <- forkIO $ commitThread st changechan
|
_ <- forkIO $ commitThread st changechan
|
||||||
-- The fast flag is abused somewhat, to tell when the startup
|
runStateMVar st $
|
||||||
-- scan is still running.
|
|
||||||
runStateMVar st $ do
|
|
||||||
setfast False
|
|
||||||
showAction "scanning"
|
showAction "scanning"
|
||||||
-- This does not return until the startup scan is done.
|
-- This does not return until the startup scan is done.
|
||||||
-- That can take some time for large trees.
|
-- That can take some time for large trees.
|
||||||
watchDir i "." (ignored . takeFileName) hooks
|
watchDir i "." (ignored . takeFileName) hooks
|
||||||
runStateMVar st $ setfast True
|
runStateMVar st $
|
||||||
|
modifyDaemonStatus dstatus $ \s -> s { scanComplete = True }
|
||||||
-- Notice any files that were deleted before inotify
|
-- Notice any files that were deleted before inotify
|
||||||
-- was started.
|
-- was started.
|
||||||
runStateMVar st $ do
|
runStateMVar st $ do
|
||||||
|
@ -140,7 +163,14 @@ watch st = withINotify $ \i -> do
|
||||||
showAction "started"
|
showAction "started"
|
||||||
waitForTermination
|
waitForTermination
|
||||||
where
|
where
|
||||||
setfast v= Annex.changeState $ \s -> s { Annex.fast = v }
|
hook a = Just $ runHandler st dstatus changechan a
|
||||||
|
hooks = WatchHooks
|
||||||
|
{ addHook = hook onAdd
|
||||||
|
, delHook = hook onDel
|
||||||
|
, addSymlinkHook = hook onAddSymlink
|
||||||
|
, delDirHook = hook onDelDir
|
||||||
|
, errHook = hook onErr
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
watch = error "watch mode is so far only available on Linux"
|
watch = error "watch mode is so far only available on Linux"
|
||||||
#endif
|
#endif
|
||||||
|
@ -181,14 +211,16 @@ runChangeChan = atomically
|
||||||
-
|
-
|
||||||
- Exceptions are ignored, otherwise a whole watcher thread could be crashed.
|
- Exceptions are ignored, otherwise a whole watcher thread could be crashed.
|
||||||
-}
|
-}
|
||||||
runHandler :: MVar Annex.AnnexState -> ChangeChan -> Handler -> FilePath -> Maybe FileStatus -> IO ()
|
runHandler :: MVar Annex.AnnexState -> MVar DaemonStatus -> ChangeChan -> Handler -> FilePath -> Maybe FileStatus -> IO ()
|
||||||
runHandler st changechan handler file filestatus = void $ do
|
runHandler st dstatus changechan handler file filestatus = void $ do
|
||||||
r <- tryIO (runStateMVar st $ handler file filestatus)
|
r <- tryIO go
|
||||||
case r of
|
case r of
|
||||||
Left e -> print e
|
Left e -> print e
|
||||||
Right Nothing -> noop
|
Right Nothing -> noop
|
||||||
Right (Just change) -> void $
|
Right (Just change) -> void $
|
||||||
runChangeChan $ writeTChan changechan change
|
runChangeChan $ writeTChan changechan change
|
||||||
|
where
|
||||||
|
go = runStateMVar st $ handler file filestatus dstatus
|
||||||
|
|
||||||
{- Handlers call this when they made a change that needs to get committed. -}
|
{- Handlers call this when they made a change that needs to get committed. -}
|
||||||
madeChange :: FilePath -> String -> Annex (Maybe Change)
|
madeChange :: FilePath -> String -> Annex (Maybe Change)
|
||||||
|
@ -214,14 +246,13 @@ noChange = return Nothing
|
||||||
- startup.
|
- startup.
|
||||||
-}
|
-}
|
||||||
onAdd :: Handler
|
onAdd :: Handler
|
||||||
onAdd file _filestatus = do
|
onAdd file _filestatus dstatus = do
|
||||||
ifM (Annex.getState Annex.fast)
|
ifM (scanComplete <$> getDaemonStatus dstatus)
|
||||||
( go -- initial directory scan is complete
|
( go
|
||||||
, do -- expensive check done only during startup scan
|
, ifM (null <$> inRepo (Git.LsFiles.notInRepo False [file]))
|
||||||
ifM (null <$> inRepo (Git.LsFiles.notInRepo False [file]))
|
( noChange
|
||||||
( noChange
|
, go
|
||||||
, go
|
)
|
||||||
)
|
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
go = do
|
go = do
|
||||||
|
@ -237,24 +268,43 @@ onAdd file _filestatus = do
|
||||||
- Or, if it is a git-annex symlink, ensure it points to the content
|
- Or, if it is a git-annex symlink, ensure it points to the content
|
||||||
- before adding it.
|
- before adding it.
|
||||||
-
|
-
|
||||||
- This is often called on symlinks that are already staged correctly.
|
|
||||||
- A symlink may have been deleted and being re-added, or added when
|
|
||||||
- the watcher was not running; so it always stages even symlinks that
|
|
||||||
- already exist.
|
|
||||||
-}
|
-}
|
||||||
onAddSymlink :: Handler
|
onAddSymlink :: Handler
|
||||||
onAddSymlink file filestatus = go =<< Backend.lookupFile file
|
onAddSymlink file filestatus dstatus = go =<< Backend.lookupFile file
|
||||||
where
|
where
|
||||||
go Nothing = addlink =<< liftIO (readSymbolicLink file)
|
|
||||||
go (Just (key, _)) = do
|
go (Just (key, _)) = do
|
||||||
link <- calcGitLink file key
|
link <- calcGitLink file key
|
||||||
ifM ((==) link <$> liftIO (readSymbolicLink file))
|
ifM ((==) link <$> liftIO (readSymbolicLink file))
|
||||||
( addlink link
|
( ensurestaged link =<< getDaemonStatus dstatus
|
||||||
, do
|
, do
|
||||||
liftIO $ removeFile file
|
liftIO $ removeFile file
|
||||||
liftIO $ createSymbolicLink link file
|
liftIO $ createSymbolicLink link file
|
||||||
addlink link
|
addlink link
|
||||||
)
|
)
|
||||||
|
go Nothing = do -- other symlink
|
||||||
|
link <- liftIO (readSymbolicLink file)
|
||||||
|
ensurestaged link =<< getDaemonStatus dstatus
|
||||||
|
|
||||||
|
{- This is often called on symlinks that are already
|
||||||
|
- staged correctly. A symlink may have been deleted
|
||||||
|
- and being re-added, or added when the watcher was
|
||||||
|
- not running. So they're normally restaged to make sure.
|
||||||
|
-
|
||||||
|
- As an optimisation, during the status scan, avoid
|
||||||
|
- restaging everything. Only links that were created since
|
||||||
|
- the last time the daemon was running are staged.
|
||||||
|
- (If the daemon has never ran before, avoid staging
|
||||||
|
- links too.)
|
||||||
|
-}
|
||||||
|
ensurestaged link daemonstatus
|
||||||
|
| scanComplete daemonstatus = addlink link
|
||||||
|
| otherwise = case filestatus of
|
||||||
|
Just s
|
||||||
|
| safe (statusChangeTime s) -> noChange
|
||||||
|
_ -> addlink link
|
||||||
|
where
|
||||||
|
safe t = maybe True (> t) (lastRunning daemonstatus)
|
||||||
|
|
||||||
{- For speed, tries to reuse the existing blob for
|
{- For speed, tries to reuse the existing blob for
|
||||||
- the symlink target. -}
|
- the symlink target. -}
|
||||||
addlink link = do
|
addlink link = do
|
||||||
|
@ -270,7 +320,7 @@ onAddSymlink file filestatus = go =<< Backend.lookupFile file
|
||||||
madeChange file "link"
|
madeChange file "link"
|
||||||
|
|
||||||
onDel :: Handler
|
onDel :: Handler
|
||||||
onDel file _filestatus = do
|
onDel file _ _dstatus = do
|
||||||
Annex.Queue.addUpdateIndex =<<
|
Annex.Queue.addUpdateIndex =<<
|
||||||
inRepo (Git.UpdateIndex.unstageFile file)
|
inRepo (Git.UpdateIndex.unstageFile file)
|
||||||
madeChange file "rm"
|
madeChange file "rm"
|
||||||
|
@ -283,14 +333,14 @@ onDel file _filestatus = do
|
||||||
- command to get the recursive list of files in the directory, so rm is
|
- command to get the recursive list of files in the directory, so rm is
|
||||||
- just as good. -}
|
- just as good. -}
|
||||||
onDelDir :: Handler
|
onDelDir :: Handler
|
||||||
onDelDir dir _filestatus = do
|
onDelDir dir _ _dstatus = do
|
||||||
Annex.Queue.addCommand "rm"
|
Annex.Queue.addCommand "rm"
|
||||||
[Params "--quiet -r --cached --ignore-unmatch --"] [dir]
|
[Params "--quiet -r --cached --ignore-unmatch --"] [dir]
|
||||||
madeChange dir "rmdir"
|
madeChange dir "rmdir"
|
||||||
|
|
||||||
{- Called when there's an error with inotify. -}
|
{- Called when there's an error with inotify. -}
|
||||||
onErr :: Handler
|
onErr :: Handler
|
||||||
onErr msg _ = do
|
onErr msg _ _dstatus = do
|
||||||
warning msg
|
warning msg
|
||||||
return Nothing
|
return Nothing
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue