cache one more log file for metadata
My worry was that a preferred content expression that matches on metadata would have removed the location log from cache, causing an expensive re-read when a Seek action later checked the location log. Especially when the --all optimisation in the previous commit pre-cached the location log. This also means that the --all optimisation could cache the metadata log too, if it wanted too, but not currently done. The cache is a list, with the most recently accessed file first. That optimises it for the common case of reading the same file twice, eg a get, examine, followed by set reads it twice. And sync --content reads the location log 3 times in a row commonly. But, as a list, it should not be made to be too long. I thought about expanding it to 5 items, but that seemed unlikely to be a win commonly enough to outweigh the extra time spent checking the cache. Clearly there could be some further benchmarking and tuning here.
This commit is contained in:
parent
d010ab04be
commit
9483b10469
3 changed files with 35 additions and 20 deletions
|
@ -12,6 +12,7 @@ module Annex.BranchState where
|
|||
import Annex.Common
|
||||
import Types.BranchState
|
||||
import qualified Annex
|
||||
import Logs
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
|
@ -88,22 +89,19 @@ enableInteractiveBranchAccess = changeState $
|
|||
|
||||
setCache :: RawFilePath -> L.ByteString -> Annex ()
|
||||
setCache file content = changeState $ \s -> s
|
||||
{ cachedFile = Just file
|
||||
, cachedContent = content
|
||||
}
|
||||
{ cachedFileContents = add (cachedFileContents s) }
|
||||
where
|
||||
add l
|
||||
| length l < logFilesToCache = (file, content) : l
|
||||
| otherwise = (file, content) : Prelude.init l
|
||||
|
||||
getCache :: RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getCache file = go <$> getState
|
||||
getCache file = (\st -> go (cachedFileContents st) st) <$> getState
|
||||
where
|
||||
go state
|
||||
| cachedFile state == Just file
|
||||
&& not (needInteractiveAccess state) =
|
||||
Just (cachedContent state)
|
||||
| otherwise = Nothing
|
||||
go [] _ = Nothing
|
||||
go ((f,c):rest) state
|
||||
| f == file && not (needInteractiveAccess state) = Just c
|
||||
| otherwise = go rest state
|
||||
|
||||
invalidateCache :: Annex ()
|
||||
invalidateCache = changeState $ \s -> s
|
||||
{ cachedFile = Nothing
|
||||
, cachedContent = mempty
|
||||
}
|
||||
|
||||
invalidateCache = changeState $ \s -> s { cachedFileContents = [] }
|
||||
|
|
21
Logs.hs
21
Logs.hs
|
@ -38,6 +38,26 @@ getLogVariety config f
|
|||
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
||||
| otherwise = PresenceLog <$> firstJust (presenceLogs config f)
|
||||
|
||||
{- Typical number of log files that may be read while processing a single
|
||||
- key. This is used to size a cache.
|
||||
-
|
||||
- The location log is generally read, and the metadata log is read when
|
||||
- matching a preferred content expression that matches on metadata,
|
||||
- or when using metadata options.
|
||||
-
|
||||
- When using a remote, the url log, chunk log, remote state log, remote
|
||||
- metadata log, and remote content identifier log might each be used,
|
||||
- but probably at most 3 out of the 6. However, caching too much slows
|
||||
- down all operations because the cache is a linear list, so the cache
|
||||
- is not currently sized to include these.
|
||||
-
|
||||
- The result is that when seeking for files to operate on,
|
||||
- the location log will stay in the cache if the metadata log is also
|
||||
- read.
|
||||
-}
|
||||
logFilesToCache :: Int
|
||||
logFilesToCache = 2
|
||||
|
||||
{- All the old-format uuid-based logs stored in the top of the git-annex branch. -}
|
||||
topLevelOldUUIDBasedLogs :: [RawFilePath]
|
||||
topLevelOldUUIDBasedLogs =
|
||||
|
@ -59,7 +79,6 @@ topLevelNewUUIDBasedLogs =
|
|||
[ exportLog
|
||||
]
|
||||
|
||||
|
||||
{- All the ways to get a key from a presence log file -}
|
||||
presenceLogs :: GitConfig -> RawFilePath -> [Maybe Key]
|
||||
presenceLogs config f =
|
||||
|
|
|
@ -19,10 +19,8 @@ data BranchState = BranchState
|
|||
, journalIgnorable :: Bool
|
||||
-- ^ can reading the journal be skipped, while still getting
|
||||
-- sufficiently up-to-date information from the branch?
|
||||
, cachedFile :: Maybe RawFilePath
|
||||
-- ^ a file recently read from the branch
|
||||
, cachedContent :: L.ByteString
|
||||
-- ^ content of the cachedFile
|
||||
, cachedFileContents :: [(RawFilePath, L.ByteString)]
|
||||
-- ^ contents of a few files recently read from the branch
|
||||
, needInteractiveAccess :: Bool
|
||||
-- ^ do new changes written to the journal or branch by another
|
||||
-- process need to be noticed while the current process is running?
|
||||
|
@ -31,4 +29,4 @@ data BranchState = BranchState
|
|||
}
|
||||
|
||||
startBranchState :: BranchState
|
||||
startBranchState = BranchState False False False Nothing mempty False
|
||||
startBranchState = BranchState False False False [] False
|
||||
|
|
Loading…
Reference in a new issue