2011-12-12 21:38:46 +00:00
|
|
|
{- git-annex branch state management
|
|
|
|
-
|
2020-07-06 16:09:53 +00:00
|
|
|
- Runtime state about the git-annex branch, and a small cache.
|
2011-12-12 21:38:46 +00:00
|
|
|
-
|
2020-04-09 17:54:43 +00:00
|
|
|
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
2011-12-12 21:38:46 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2011-12-12 21:38:46 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Annex.BranchState where
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2011-12-12 21:38:46 +00:00
|
|
|
import Types.BranchState
|
|
|
|
import qualified Annex
|
cache one more log file for metadata
My worry was that a preferred content expression that matches on metadata
would have removed the location log from cache, causing an expensive
re-read when a Seek action later checked the location log.
Especially when the --all optimisation in the previous commit
pre-cached the location log.
This also means that the --all optimisation could cache the metadata log
too, if it wanted too, but not currently done.
The cache is a list, with the most recently accessed file first. That
optimises it for the common case of reading the same file twice, eg a
get, examine, followed by set reads it twice. And sync --content reads the
location log 3 times in a row commonly.
But, as a list, it should not be made to be too long. I thought about
expanding it to 5 items, but that seemed unlikely to be a win commonly
enough to outweigh the extra time spent checking the cache.
Clearly there could be some further benchmarking and tuning here.
2020-07-07 18:18:55 +00:00
|
|
|
import Logs
|
2011-12-12 21:38:46 +00:00
|
|
|
|
2020-07-06 16:09:53 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
|
2011-12-12 21:38:46 +00:00
|
|
|
getState :: Annex BranchState
|
|
|
|
getState = Annex.getState Annex.branchstate
|
|
|
|
|
2012-01-14 18:31:16 +00:00
|
|
|
changeState :: (BranchState -> BranchState) -> Annex ()
|
2015-04-10 19:16:17 +00:00
|
|
|
changeState changer = Annex.changeState $ \s ->
|
|
|
|
s { Annex.branchstate = changer (Annex.branchstate s) }
|
2012-01-14 18:31:16 +00:00
|
|
|
|
2012-01-14 16:07:36 +00:00
|
|
|
{- Runs an action to check that the index file exists, if it's not been
|
|
|
|
- checked before in this run of git-annex. -}
|
|
|
|
checkIndexOnce :: Annex () -> Annex ()
|
|
|
|
checkIndexOnce a = unlessM (indexChecked <$> getState) $ do
|
|
|
|
a
|
2012-01-14 18:31:16 +00:00
|
|
|
changeState $ \s -> s { indexChecked = True }
|
2012-01-14 16:07:36 +00:00
|
|
|
|
2011-12-12 21:38:46 +00:00
|
|
|
{- Runs an action to update the branch, if it's not been updated before
|
2020-04-15 17:04:34 +00:00
|
|
|
- in this run of git-annex.
|
|
|
|
-
|
|
|
|
- The action should return True if anything that was in the journal
|
|
|
|
- before got staged (or if the journal was empty). That lets an opmisation
|
|
|
|
- be done: The journal then does not need to be checked going forward,
|
|
|
|
- until new information gets written to it.
|
2021-04-02 15:56:50 +00:00
|
|
|
-
|
|
|
|
- When interactive access is enabled, the journal is always checked when
|
|
|
|
- reading values from the branch, and so this does not need to update
|
|
|
|
- the branch.
|
2020-04-15 17:04:34 +00:00
|
|
|
-}
|
|
|
|
runUpdateOnce :: Annex Bool -> Annex BranchState
|
2020-04-09 17:54:43 +00:00
|
|
|
runUpdateOnce a = do
|
|
|
|
st <- getState
|
2021-04-02 15:56:50 +00:00
|
|
|
if branchUpdated st || needInteractiveAccess st
|
2020-04-09 17:54:43 +00:00
|
|
|
then return st
|
|
|
|
else do
|
2020-04-15 17:04:34 +00:00
|
|
|
journalstaged <- a
|
2020-04-09 17:54:43 +00:00
|
|
|
let stf = \st' -> st'
|
|
|
|
{ branchUpdated = True
|
2020-04-15 17:04:34 +00:00
|
|
|
, journalIgnorable = journalstaged
|
2020-04-09 17:54:43 +00:00
|
|
|
}
|
|
|
|
changeState stf
|
|
|
|
return (stf st)
|
2011-12-12 21:38:46 +00:00
|
|
|
|
|
|
|
{- Avoids updating the branch. A useful optimisation when the branch
|
|
|
|
- is known to have not changed, or git-annex won't be relying on info
|
2021-04-02 14:35:15 +00:00
|
|
|
- queried from it being as up-to-date as possible. -}
|
2011-12-12 21:38:46 +00:00
|
|
|
disableUpdate :: Annex ()
|
2012-01-14 18:31:16 +00:00
|
|
|
disableUpdate = changeState $ \s -> s { branchUpdated = True }
|
2020-04-09 17:54:43 +00:00
|
|
|
|
|
|
|
{- Called when a change is made to the journal. -}
|
|
|
|
journalChanged :: Annex ()
|
|
|
|
journalChanged = do
|
|
|
|
-- Optimisation: Typically journalIgnorable will already be True
|
|
|
|
-- (when one thing gets journalled, often other things do to),
|
|
|
|
-- so avoid an unnecessary write to the MVar that changeState
|
|
|
|
-- would do.
|
|
|
|
--
|
|
|
|
-- This assumes that another thread is not changing journalIgnorable
|
|
|
|
-- at the same time, but since runUpdateOnce is the only
|
|
|
|
-- thing that changes it, and it only runs once, that
|
|
|
|
-- should not happen.
|
|
|
|
st <- getState
|
|
|
|
when (journalIgnorable st) $
|
|
|
|
changeState $ \st' -> st' { journalIgnorable = False }
|
|
|
|
|
|
|
|
{- When git-annex is somehow interactive, eg in --batch mode,
|
|
|
|
- and needs to always notice changes made to the journal by other
|
|
|
|
- processes, this disables optimisations that avoid normally reading the
|
|
|
|
- journal.
|
2020-07-06 16:09:53 +00:00
|
|
|
-
|
|
|
|
- It also avoids using the cache, so changes committed by other processes
|
|
|
|
- will be seen.
|
2020-04-09 17:54:43 +00:00
|
|
|
-}
|
2020-07-06 16:09:53 +00:00
|
|
|
enableInteractiveBranchAccess :: Annex ()
|
|
|
|
enableInteractiveBranchAccess = changeState $
|
|
|
|
\s -> s { needInteractiveAccess = True }
|
|
|
|
|
|
|
|
setCache :: RawFilePath -> L.ByteString -> Annex ()
|
|
|
|
setCache file content = changeState $ \s -> s
|
cache one more log file for metadata
My worry was that a preferred content expression that matches on metadata
would have removed the location log from cache, causing an expensive
re-read when a Seek action later checked the location log.
Especially when the --all optimisation in the previous commit
pre-cached the location log.
This also means that the --all optimisation could cache the metadata log
too, if it wanted too, but not currently done.
The cache is a list, with the most recently accessed file first. That
optimises it for the common case of reading the same file twice, eg a
get, examine, followed by set reads it twice. And sync --content reads the
location log 3 times in a row commonly.
But, as a list, it should not be made to be too long. I thought about
expanding it to 5 items, but that seemed unlikely to be a win commonly
enough to outweigh the extra time spent checking the cache.
Clearly there could be some further benchmarking and tuning here.
2020-07-07 18:18:55 +00:00
|
|
|
{ cachedFileContents = add (cachedFileContents s) }
|
|
|
|
where
|
|
|
|
add l
|
|
|
|
| length l < logFilesToCache = (file, content) : l
|
|
|
|
| otherwise = (file, content) : Prelude.init l
|
2020-07-06 16:09:53 +00:00
|
|
|
|
|
|
|
getCache :: RawFilePath -> Annex (Maybe L.ByteString)
|
cache one more log file for metadata
My worry was that a preferred content expression that matches on metadata
would have removed the location log from cache, causing an expensive
re-read when a Seek action later checked the location log.
Especially when the --all optimisation in the previous commit
pre-cached the location log.
This also means that the --all optimisation could cache the metadata log
too, if it wanted too, but not currently done.
The cache is a list, with the most recently accessed file first. That
optimises it for the common case of reading the same file twice, eg a
get, examine, followed by set reads it twice. And sync --content reads the
location log 3 times in a row commonly.
But, as a list, it should not be made to be too long. I thought about
expanding it to 5 items, but that seemed unlikely to be a win commonly
enough to outweigh the extra time spent checking the cache.
Clearly there could be some further benchmarking and tuning here.
2020-07-07 18:18:55 +00:00
|
|
|
getCache file = (\st -> go (cachedFileContents st) st) <$> getState
|
2020-07-06 16:09:53 +00:00
|
|
|
where
|
cache one more log file for metadata
My worry was that a preferred content expression that matches on metadata
would have removed the location log from cache, causing an expensive
re-read when a Seek action later checked the location log.
Especially when the --all optimisation in the previous commit
pre-cached the location log.
This also means that the --all optimisation could cache the metadata log
too, if it wanted too, but not currently done.
The cache is a list, with the most recently accessed file first. That
optimises it for the common case of reading the same file twice, eg a
get, examine, followed by set reads it twice. And sync --content reads the
location log 3 times in a row commonly.
But, as a list, it should not be made to be too long. I thought about
expanding it to 5 items, but that seemed unlikely to be a win commonly
enough to outweigh the extra time spent checking the cache.
Clearly there could be some further benchmarking and tuning here.
2020-07-07 18:18:55 +00:00
|
|
|
go [] _ = Nothing
|
|
|
|
go ((f,c):rest) state
|
|
|
|
| f == file && not (needInteractiveAccess state) = Just c
|
|
|
|
| otherwise = go rest state
|
2020-07-06 16:09:53 +00:00
|
|
|
|
|
|
|
invalidateCache :: Annex ()
|
cache one more log file for metadata
My worry was that a preferred content expression that matches on metadata
would have removed the location log from cache, causing an expensive
re-read when a Seek action later checked the location log.
Especially when the --all optimisation in the previous commit
pre-cached the location log.
This also means that the --all optimisation could cache the metadata log
too, if it wanted too, but not currently done.
The cache is a list, with the most recently accessed file first. That
optimises it for the common case of reading the same file twice, eg a
get, examine, followed by set reads it twice. And sync --content reads the
location log 3 times in a row commonly.
But, as a list, it should not be made to be too long. I thought about
expanding it to 5 items, but that seemed unlikely to be a win commonly
enough to outweigh the extra time spent checking the cache.
Clearly there could be some further benchmarking and tuning here.
2020-07-07 18:18:55 +00:00
|
|
|
invalidateCache = changeState $ \s -> s { cachedFileContents = [] }
|