cache one more log file for metadata
My worry was that a preferred content expression that matches on metadata would have removed the location log from cache, causing an expensive re-read when a Seek action later checked the location log. Especially when the --all optimisation in the previous commit pre-cached the location log. This also means that the --all optimisation could cache the metadata log too, if it wanted too, but not currently done. The cache is a list, with the most recently accessed file first. That optimises it for the common case of reading the same file twice, eg a get, examine, followed by set reads it twice. And sync --content reads the location log 3 times in a row commonly. But, as a list, it should not be made to be too long. I thought about expanding it to 5 items, but that seemed unlikely to be a win commonly enough to outweigh the extra time spent checking the cache. Clearly there could be some further benchmarking and tuning here.
This commit is contained in:
parent
d010ab04be
commit
9483b10469
3 changed files with 35 additions and 20 deletions
|
@ -12,6 +12,7 @@ module Annex.BranchState where
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
import Types.BranchState
|
import Types.BranchState
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
|
import Logs
|
||||||
|
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
|
@ -88,22 +89,19 @@ enableInteractiveBranchAccess = changeState $
|
||||||
|
|
||||||
setCache :: RawFilePath -> L.ByteString -> Annex ()
|
setCache :: RawFilePath -> L.ByteString -> Annex ()
|
||||||
setCache file content = changeState $ \s -> s
|
setCache file content = changeState $ \s -> s
|
||||||
{ cachedFile = Just file
|
{ cachedFileContents = add (cachedFileContents s) }
|
||||||
, cachedContent = content
|
where
|
||||||
}
|
add l
|
||||||
|
| length l < logFilesToCache = (file, content) : l
|
||||||
|
| otherwise = (file, content) : Prelude.init l
|
||||||
|
|
||||||
getCache :: RawFilePath -> Annex (Maybe L.ByteString)
|
getCache :: RawFilePath -> Annex (Maybe L.ByteString)
|
||||||
getCache file = go <$> getState
|
getCache file = (\st -> go (cachedFileContents st) st) <$> getState
|
||||||
where
|
where
|
||||||
go state
|
go [] _ = Nothing
|
||||||
| cachedFile state == Just file
|
go ((f,c):rest) state
|
||||||
&& not (needInteractiveAccess state) =
|
| f == file && not (needInteractiveAccess state) = Just c
|
||||||
Just (cachedContent state)
|
| otherwise = go rest state
|
||||||
| otherwise = Nothing
|
|
||||||
|
|
||||||
invalidateCache :: Annex ()
|
invalidateCache :: Annex ()
|
||||||
invalidateCache = changeState $ \s -> s
|
invalidateCache = changeState $ \s -> s { cachedFileContents = [] }
|
||||||
{ cachedFile = Nothing
|
|
||||||
, cachedContent = mempty
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
21
Logs.hs
21
Logs.hs
|
@ -38,6 +38,26 @@ getLogVariety config f
|
||||||
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
||||||
| otherwise = PresenceLog <$> firstJust (presenceLogs config f)
|
| otherwise = PresenceLog <$> firstJust (presenceLogs config f)
|
||||||
|
|
||||||
|
{- Typical number of log files that may be read while processing a single
|
||||||
|
- key. This is used to size a cache.
|
||||||
|
-
|
||||||
|
- The location log is generally read, and the metadata log is read when
|
||||||
|
- matching a preferred content expression that matches on metadata,
|
||||||
|
- or when using metadata options.
|
||||||
|
-
|
||||||
|
- When using a remote, the url log, chunk log, remote state log, remote
|
||||||
|
- metadata log, and remote content identifier log might each be used,
|
||||||
|
- but probably at most 3 out of the 6. However, caching too much slows
|
||||||
|
- down all operations because the cache is a linear list, so the cache
|
||||||
|
- is not currently sized to include these.
|
||||||
|
-
|
||||||
|
- The result is that when seeking for files to operate on,
|
||||||
|
- the location log will stay in the cache if the metadata log is also
|
||||||
|
- read.
|
||||||
|
-}
|
||||||
|
logFilesToCache :: Int
|
||||||
|
logFilesToCache = 2
|
||||||
|
|
||||||
{- All the old-format uuid-based logs stored in the top of the git-annex branch. -}
|
{- All the old-format uuid-based logs stored in the top of the git-annex branch. -}
|
||||||
topLevelOldUUIDBasedLogs :: [RawFilePath]
|
topLevelOldUUIDBasedLogs :: [RawFilePath]
|
||||||
topLevelOldUUIDBasedLogs =
|
topLevelOldUUIDBasedLogs =
|
||||||
|
@ -59,7 +79,6 @@ topLevelNewUUIDBasedLogs =
|
||||||
[ exportLog
|
[ exportLog
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
{- All the ways to get a key from a presence log file -}
|
{- All the ways to get a key from a presence log file -}
|
||||||
presenceLogs :: GitConfig -> RawFilePath -> [Maybe Key]
|
presenceLogs :: GitConfig -> RawFilePath -> [Maybe Key]
|
||||||
presenceLogs config f =
|
presenceLogs config f =
|
||||||
|
|
|
@ -19,10 +19,8 @@ data BranchState = BranchState
|
||||||
, journalIgnorable :: Bool
|
, journalIgnorable :: Bool
|
||||||
-- ^ can reading the journal be skipped, while still getting
|
-- ^ can reading the journal be skipped, while still getting
|
||||||
-- sufficiently up-to-date information from the branch?
|
-- sufficiently up-to-date information from the branch?
|
||||||
, cachedFile :: Maybe RawFilePath
|
, cachedFileContents :: [(RawFilePath, L.ByteString)]
|
||||||
-- ^ a file recently read from the branch
|
-- ^ contents of a few files recently read from the branch
|
||||||
, cachedContent :: L.ByteString
|
|
||||||
-- ^ content of the cachedFile
|
|
||||||
, needInteractiveAccess :: Bool
|
, needInteractiveAccess :: Bool
|
||||||
-- ^ do new changes written to the journal or branch by another
|
-- ^ do new changes written to the journal or branch by another
|
||||||
-- process need to be noticed while the current process is running?
|
-- process need to be noticed while the current process is running?
|
||||||
|
@ -31,4 +29,4 @@ data BranchState = BranchState
|
||||||
}
|
}
|
||||||
|
|
||||||
startBranchState :: BranchState
|
startBranchState :: BranchState
|
||||||
startBranchState = BranchState False False False Nothing mempty False
|
startBranchState = BranchState False False False [] False
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue